framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,1,0.13671200275421141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,3,0.13653600215911865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,1,0.16539039611816406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,3,0.16525119543075562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,7,0.13690240383148194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,7,0.16526559591293336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,15,0.13708640336990358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,15,0.16394879817962646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,31,0.14236960411071778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,31,0.16360000371932984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,63,0.14618079662322997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,63,0.17587679624557495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,127,0.15201280117034913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,255,0.2204655885696411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,255,0.2359776020050049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,511,0.3938127994537354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,511,0.35591840744018555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,1023,0.728656005859375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,3,0.021054400503635405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,7,0.020182399451732634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,float16,2047,1.4034192085266113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,7,0.02101600021123886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,15,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,15,0.02096959948539734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,31,0.02107200026512146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,31,0.02011200040578842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,63,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,127,0.020366400480270386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,63,0.020902399718761445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,127,0.021080000698566435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,255,0.021830399334430695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,255,0.023086400330066682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,511,0.02475679963827133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,511,0.02662079930305481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,1023,0.029499199986457825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,1023,0.029782399535179138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,2047,0.04691520035266876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,2047,0.03811039924621582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,1,0.02051839977502823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,3,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,1,0.02168319970369339
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,3,0.02144639939069748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,7,0.020740799605846405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,7,0.02156960070133209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,15,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,15,0.02140959948301315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,127,0.18575520515441896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,31,0.020838400721549986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,31,0.021590399742126464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,63,0.021753600239753722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,63,0.020971199870109557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,127,0.02088959962129593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,127,0.021827200055122377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,255,0.0227183997631073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,255,0.023577600717544556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,511,0.029264000058174134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,511,0.027835199236869813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,1023,0.0469871997833252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,1023,0.03980000019073486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,1,0.07780159711837768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,float16,2047,0.06866880059242249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,64,128,1,float16,fp8,2047,0.055638402700424194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,1,0.09054560065269471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,3,0.07749119997024537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,fp8,1,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,1,0.020580799877643587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,3,0.09022079706192017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,64,128,1,float16,float16,3,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,7,0.0776144027709961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,1023,0.5622928142547607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,7,0.09066720008850097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,15,0.07783520221710205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,31,0.07749919891357422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,15,0.09085599780082702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,31,0.0910863995552063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,63,0.08461440205574036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,64,128,1,float16,fp8,2047,0.9234144210815429
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,127,0.0876688003540039
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,511,0.2098383903503418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,63,0.0950447976589203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,127,0.10385600328445435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,255,0.12489440441131591
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,255,0.13115520477294923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,1023,0.3791856050491333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,1,0.02486719936132431
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,3,0.024582399427890776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,1,0.025963199138641358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,511,0.19104479551315307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,1023,0.29150240421295165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,float16,2047,0.7144464015960693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,7,0.024809600412845613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,7,0.026052799820899964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,3,0.02601119875907898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,15,0.024564799666404725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,64,128,1,float16,fp8,2047,0.4777088165283203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,127,0.024617600440979003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,15,0.02622720003128052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,31,0.024659200012683867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,31,0.026332798600196838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,63,0.024740800261497498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,63,0.026155200600624085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,127,0.026233598589897156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,255,0.030324798822402955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,255,0.030025601387023926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,1023,0.06000319719314575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,511,0.04702720046043396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,511,0.0406031996011734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,1023,0.07144799828529358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,float16,2047,0.1138159990310669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,64,128,1,float16,fp8,2047,0.08390560150146484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,1,0.2540704011917114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,1,0.31095359325408933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,3,0.25161919593811033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,3,0.3090528011322021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,7,0.25015840530395506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,7,0.3089792013168335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,15,0.2563055992126465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,31,0.32588160037994385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,15,0.3099168062210083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,31,0.2687376022338867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,63,0.2684560060501099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,63,0.3302783966064453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,127,0.2790271997451782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,127,0.3482896089553833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,255,0.41070241928100587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,255,0.4395296096801758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,511,0.7699984073638916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,1,0.4803167819976807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,511,0.6668047904968262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,1,0.5982096195220947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,3,0.47946557998657224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,7,0.4895328044891357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,3,0.5965648174285889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,fp8,1023,1.0703264236450196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,64,128,1,float16,float16,1023,1.4233728408813477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,15,0.5062047958374023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,31,0.5082704067230225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,7,0.5969583988189697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,15,0.5944416046142578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,31,0.6279344081878662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,63,0.5126336097717286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,127,0.5318031787872315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,63,0.6377567768096923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,127,0.6617919921875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,float16,255,0.7868159770965576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,1,0.9827712059020997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,64,128,1,float16,fp8,255,0.8503791809082031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,1,1.1381919860839844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,3,0.9904015541076661
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,7,0.9913104057312012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,3,1.141494369506836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,7,1.1874863624572753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,15,1.0022128105163575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,15,1.2136672019958497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,31,0.9921168327331543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,1,0.03301919996738434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,1,0.03472320139408112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,3,0.032183998823165895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,63,0.9853584289550781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,31,1.2217647552490234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,7,0.03177280128002167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,3,0.035950401425361635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,7,0.034862399101257324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,15,0.03162719905376434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,63,1.2294207572937013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,15,0.03437120020389557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,float16,127,1.03712158203125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,127,0.035025599598884585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,31,0.03187040090560913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,31,0.03598400056362152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,63,0.032227200269699094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,64,128,1,float16,fp8,127,1.3148608207702637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,63,0.03455039858818054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,127,0.03442879915237427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,255,0.04622719883918762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,255,0.053887999057769774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,511,0.07126880288124085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,511,0.06521440148353577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,1023,0.11540800333023071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,float16,2047,0.19935840368270874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,1023,0.09778079986572266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,64,128,1,float16,fp8,2047,0.13846880197525024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,1,1.9331119537353516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,3,1.9336528778076172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,1,2.389302444458008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,7,1.953264045715332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,3,2.4310575485229493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,15,1.9560895919799806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,7,2.41768798828125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,15,2.409582328796387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,31,1.943832015991211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,float16,63,1.9364751815795898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,31,2.3938320159912108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,64,128,1,float16,fp8,63,2.4066463470458985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,1,3.8241950988769533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,3,3.8026737213134765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,1,4.794729614257813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,7,3.8335807800292967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,1,0.046374401450157164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,3,4.719236755371094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,3,0.046323201060295104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,1,0.052167999744415286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,3,0.052288001775741576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,7,0.04631519913673401
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,15,3.8069873809814454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,7,0.05208960175514221
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,15,0.04631519913673401
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,31,0.04638879895210266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,15,0.05189759731292724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,7,4.7140239715576175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,31,0.0522383987903595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,63,0.04756959974765777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,63,0.051923197507858274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,127,0.05445600152015686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,127,0.05590720176696777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,15,4.71539535522461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,255,0.07651039958000183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,255,0.07567520141601562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,float16,31,3.8424224853515625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,1023,0.20250558853149414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,511,0.11840159893035888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,511,0.10547200441360474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,1023,0.15748000144958496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,1,0.10379359722137452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,1,0.12291359901428223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,64,128,1,float16,fp8,31,4.683193588256836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,3,0.10470880270004272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,fp8,2047,0.25144000053405763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,64,128,1,float16,float16,2047,0.3689136028289795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,3,0.12386560440063477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,7,0.10397440195083618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,7,0.12298879623413086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,15,0.1038432002067566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,15,0.1254415988922119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,31,0.10700160264968872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,31,0.12943359613418579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,63,0.11289600133895875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,63,0.13284319639205933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,127,0.11760640144348145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,127,0.14105919599533082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,255,0.17402080297470093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,255,0.1802623987197876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,511,0.30530719757080077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,511,0.27733280658721926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,1,0.019670400023460387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,1023,0.5500448226928711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,1,0.02144639939069748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,1023,0.4208943843841553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,3,0.020596800744533537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,3,0.020880000293254854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,7,0.019950400292873382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,7,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,fp8,2047,0.6986048221588135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,48,48,128,1,float16,float16,2047,1.0682687759399414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,15,0.020022399723529816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,127,0.020287999510765077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,15,0.021507200598716737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,31,0.019985599815845488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,31,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,63,0.020177599787712098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,63,0.021217599511146545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,127,0.02057439982891083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,255,0.022430400550365447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,255,0.023153600096702576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,511,0.025124800205230714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,511,0.026148799061775207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,1023,0.027857598662376405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,1023,0.03261280059814453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,float16,2047,0.0488400012254715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,48,48,128,1,float16,fp8,2047,0.04077120125293732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,1,0.020880000293254854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,1,0.02128320038318634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,3,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,3,0.02130720019340515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,7,0.020576000213623047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,7,0.021396799385547637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,15,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,15,0.02173759937286377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,31,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,31,0.02141280025243759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,63,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,63,0.02141280025243759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,127,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,127,0.0215488001704216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,255,0.022729599475860597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,2047,0.06253119707107543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,255,0.023919999599456787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,511,0.026163199543952943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,511,0.02740960121154785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,float16,1023,0.045326399803161624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,1023,0.03933599889278412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,48,48,128,1,float16,fp8,2047,0.05950719714164734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,1,0.06333919763565063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,1,0.07203680276870728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,3,0.06369439959526062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,3,0.07319840192794799
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,7,0.06235359907150269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,7,0.07220640182495117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,15,0.06262239813804626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,15,0.07235519886016846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,31,0.06339840292930603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,31,0.07211999893188477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,63,0.06946719884872436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,63,0.07429440021514892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,127,0.07264959812164307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,127,0.08322880268096924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,255,0.1042639970779419
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,255,0.10451359748840332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,511,0.16584479808807373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,511,0.15130079984664918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,1023,0.29231839179992675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,1023,0.22733280658721924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,1,0.024673600494861603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,1,0.025966399908065797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,3,0.024643200635910033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,3,0.02617279887199402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,float16,2047,0.5436336040496826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,48,48,128,1,float16,fp8,2047,0.3640768051147461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,7,0.024531200528144836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,7,0.026089599728584288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,15,0.024560000002384185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,15,0.02598559856414795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,31,0.02467840015888214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,31,0.026017600297927858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,63,0.02452159970998764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,63,0.025908800959587096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,127,0.02468799948692322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,127,0.025944000482559203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,255,0.02858400046825409
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,255,0.03009119927883148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,511,0.04310239851474762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,511,0.0377920001745224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,1023,0.061715197563171384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,1023,0.05999199748039245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,float16,2047,0.09582719802856446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,48,48,128,1,float16,fp8,2047,0.08514879941940308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,1,0.1928447961807251
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,1,0.2351327896118164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,3,0.19354239702224732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,3,0.23531839847564698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,7,0.19468799829483033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,31,0.24144959449768066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,7,0.23588318824768068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,15,0.19494880437850953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,15,0.23852639198303222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,31,0.20422239303588868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,63,0.2062096118927002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,63,0.2500463962554932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,127,0.21528000831604005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,127,0.26500959396362306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,255,0.3173280000686646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,255,0.33492801189422605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,511,0.5868591785430908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,511,0.5068848133087158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,1,0.36960160732269287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,1,0.45216641426086424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,3,0.3691807985305786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,7,0.37291200160980226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,3,0.4576672077178955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,fp8,1023,0.8027279853820801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,48,48,128,1,float16,float16,1023,1.074619197845459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,15,0.38619840145111084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,7,0.4556687831878662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,15,0.44948158264160154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,31,0.3886064052581787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,31,0.48190879821777344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,63,0.3923248052597046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,127,0.5094128131866456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,63,0.48105759620666505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,127,0.4296112060546875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,float16,255,0.6008912086486816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,1,0.7322319984436035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,48,48,128,1,float16,fp8,255,0.643616008758545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,1,0.8822319984436036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,3,0.7399472236633301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,3,0.8782159805297851
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,7,0.7488143920898438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,7,0.8712351799011231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,15,0.7508959770202637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,15,0.9294336318969727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,31,0.749835205078125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,1,0.028372800350189208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,31,0.9225328445434571
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,63,0.7564191818237305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,1,0.030641600489616394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,3,0.028353598713874818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,63,0.9366144180297852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,3,0.03081279993057251
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,7,0.028126400709152222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,float16,127,0.790388822555542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,15,0.028217598795890808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,7,0.030689600110054015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,63,0.030635198950767516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,48,48,128,1,float16,fp8,127,0.9883616447448731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,15,0.030313599109649658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,31,0.02824319899082184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,255,0.03665440082550049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,31,0.030633598566055298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,63,0.02825759947299957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,127,0.028540799021720888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,127,0.030649599432945252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,255,0.04400480091571808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,511,0.05973759889602661
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,511,0.05416479706764221
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,1023,0.09463520050048828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,1023,0.07670720219612122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,float16,2047,0.15652159452438355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,48,48,128,1,float16,fp8,2047,0.11220959424972535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,1,1.4712688446044921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,3,1.4720879554748536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,1,1.7846672058105468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,7,1.4726575851440429
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,3,1.8091264724731446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,15,1.4737248420715332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,7,1.8008495330810548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,15,1.8283695220947265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,31,1.4743328094482422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,31,1.8116512298583984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,float16,63,1.485591983795166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,48,48,128,1,float16,fp8,63,1.797652816772461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,1,2.9129007339477537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,1,3.581145477294922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,3,2.8760223388671875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,7,2.8867071151733397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,1,0.04025759994983673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,1,0.04329760074615478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,3,3.5642673492431642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,3,0.04046719968318939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,15,2.862838363647461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,3,0.043275201320648195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,7,0.03880319893360138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,7,0.044833600521087646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,7,3.635478210449219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,15,0.038966399431228635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,15,0.043305599689483644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,31,0.038924801349639895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,31,0.044747200608253476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,63,0.039027199149131775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,63,0.043188801407814024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,127,0.04719040095806122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,15,3.5400672912597657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,127,0.04349760115146637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,float16,31,2.9198175430297852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,255,0.06589599847793579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,255,0.0622655987739563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,511,0.09565280079841613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,511,0.08517919778823853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,1023,0.15936319828033446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,48,48,128,1,float16,fp8,31,3.5596336364746093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,1023,0.12479679584503174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,float16,2047,0.28475840091705323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,1,0.08975840210914612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,7,0.10599679946899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,48,48,128,1,float16,fp8,2047,0.19738399982452393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,1,0.10710560083389283
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,3,0.09039520025253296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,3,0.10529600381851197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,7,0.09052960276603698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,63,0.11476000547409057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,15,0.08964639902114868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,15,0.10548319816589355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,31,0.09148960113525391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,31,0.10983040332794189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,63,0.09979199767112731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,127,0.10211520195007324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,127,0.1227455973625183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,255,0.15169440507888793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,255,0.15920159816741944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,511,0.25760641098022463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,511,0.23066880702972412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,1,0.020155200362205507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,3,0.019916799664497376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,1023,0.4653264045715332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,1023,0.35670878887176516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,1,0.021246400475502015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,3,0.021007999777793884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,7,0.020207999646663664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,7,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,float16,2047,0.8851119995117187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,15,0.020446400344371795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,40,40,128,1,float16,fp8,2047,0.5810048103332519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,15,0.020904000103473663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,127,0.020268799364566804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,31,0.02048960030078888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,31,0.02099999934434891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,63,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,63,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,127,0.020955200493335723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,255,0.022126400470733644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,255,0.022912000119686127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,511,0.024798400700092316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,511,0.027003198862075806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,1023,0.026070401072502136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,1023,0.02797279953956604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,float16,2047,0.0358704000711441
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,40,40,128,1,float16,fp8,2047,0.03198719918727875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,1,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,1,0.021171200275421142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,3,0.020283199846744537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,3,0.021524800360202788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,7,0.020598399639129638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,7,0.02139039933681488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,15,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,15,0.021486400067806243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,31,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,31,0.021279999613761903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,63,0.02035519927740097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,63,0.021489599347114564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,127,0.020851199328899384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,127,0.02173440009355545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,255,0.022484800219535826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,255,0.0237296000123024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,511,0.025617599487304688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,511,0.02720640003681183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,1023,0.04099839925765991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,1023,0.03867680132389069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,float16,2047,0.06182079911231995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,40,40,128,1,float16,fp8,2047,0.05756639838218689
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,1,0.05572479963302612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,7,0.06360639929771424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,1,0.06279680132865906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,3,0.05510240197181702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,3,0.06325439810752868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,7,0.05544480085372925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,15,0.05567359924316406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,15,0.0637279987335205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,31,0.05569919943809509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,31,0.06291679739952087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,63,0.06055520176887512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,255,0.09113600254058837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,63,0.06332640051841736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,127,0.06512479782104492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,127,0.0715503990650177
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,255,0.09258239865303039
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,511,0.1418303966522217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,511,0.12957600355148316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,1023,0.24794399738311768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,1023,0.1934448003768921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,1,0.024400000274181367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,1,0.02577440142631531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,float16,2047,0.45575361251831054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,3,0.024371199309825897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,40,40,128,1,float16,fp8,2047,0.30974879264831545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,3,0.026063999533653258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,7,0.024433599412441255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,7,0.025956800580024718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,15,0.02420479953289032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,15,0.025697600841522217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,31,0.024451200664043427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,31,0.025868800282478333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,63,0.024644799530506134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,63,0.026023998856544495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,127,0.024622400104999543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,127,0.026080000400543212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,255,0.02812480032444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,255,0.029862400889396668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,511,0.039052799344062805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,511,0.03749119937419891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,1023,0.06122879981994629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,1023,0.058524799346923825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,float16,2047,0.09512159824371338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,40,40,128,1,float16,fp8,2047,0.08459519743919372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,1,0.16372480392456054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,1,0.1976431965827942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,3,0.16592320203781127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,3,0.19889600276947023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,7,0.16644320487976075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,7,0.1986016035079956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,15,0.16514240503311156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,15,0.20046560764312743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,31,0.17318880558013916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,31,0.1994096040725708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,63,0.17685760259628297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,63,0.21194078922271728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,255,0.28466880321502686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,127,0.18502559661865234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,127,0.22223520278930664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,255,0.2678719997406006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,511,0.4907711982727051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,511,0.4311200141906738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,1,0.3092144012451172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,1,0.3816800117492676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,3,0.30712161064147947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,float16,1023,0.8951904296875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,3,0.38201758861541746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,7,0.3068687915802002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,40,40,128,1,float16,fp8,1023,0.676470422744751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,15,0.32039680480957033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,7,0.383076810836792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,15,0.3804896116256714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,31,0.3279184103012085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,63,0.3302095890045166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,31,0.40799679756164553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,63,0.4028207778930664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,127,0.3441888093948364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,127,0.4312623977661133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,float16,255,0.5122655868530274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,1,0.6024928092956543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,40,40,128,1,float16,fp8,255,0.541980791091919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,1,0.7434207916259765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,3,0.607204818725586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,3,0.7402416229248047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,7,0.6257152080535888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,7,0.7279871940612793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,15,0.6354080200195312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,15,0.7540192127227783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,31,0.6309855937957763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,31,0.782144021987915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,1,0.028112000226974486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,63,0.634603214263916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,1,0.030313599109649658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,63,0.7824016094207764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,3,0.02842719852924347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,float16,127,0.6659632205963135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,3,0.030399999022483824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,7,0.02788960039615631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,40,40,128,1,float16,fp8,127,0.8405695915222168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,7,0.030432000756263733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,15,0.028089600801467895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,15,0.03041279911994934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,31,0.028188800811767577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,31,0.030302399396896364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,63,0.028188800811767577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,63,0.030344000458717345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,127,0.028401601314544677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,127,0.03046880066394806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,1023,0.08485119938850402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,255,0.038211199641227725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,255,0.03630400002002716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,511,0.055281597375869754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,511,0.05231519937515259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,1023,0.07625280022621155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,float16,2047,0.1408560037612915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,40,40,128,1,float16,fp8,2047,0.114028799533844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,1,1.2310591697692872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,3,1.231563186645508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,1,1.43788480758667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,7,1.230081558227539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,3,1.4583071708679198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,15,1.2310511589050293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,7,1.4931296348571776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,15,1.5290016174316405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,31,1.2349072456359864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,float16,63,1.2423647880554198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,31,1.5219903945922852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,40,40,128,1,float16,fp8,63,1.5084992408752442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,1,2.430507278442383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,1,2.976367950439453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,3,2.4305583953857424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,7,2.4518720626831056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,1,0.036048001050949095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,1,0.040057599544525146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,3,2.9822080612182615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,3,0.03611840009689331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,15,2.3973487854003905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,3,0.0401775985956192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,7,0.03499360084533691
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,7,0.0386927992105484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,7,2.9743488311767576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,15,0.03528639972209931
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,15,0.04025599956512451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,31,0.03520320057868957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,15,2.9661376953125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,31,0.03892160058021545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,float16,31,2.408352088928223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,255,0.05861279964447021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,63,0.03505760133266449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,63,0.03901599943637848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,127,0.041649600863456725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,127,0.038726401329040525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,255,0.05435199737548828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,511,0.07391999959945679
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,511,0.09122880101203919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,40,40,128,1,float16,fp8,31,2.933772850036621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,1023,0.13814560174942017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,1023,0.10816160440444947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,float16,2047,0.24161601066589355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,1,0.07723039984703065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,40,40,128,1,float16,fp8,2047,0.16619999408721925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,1,0.08695359826087952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,3,0.07591840028762817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,3,0.08764320015907287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,7,0.07536640167236328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,7,0.088019198179245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,15,0.07663040161132813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,15,0.08961600065231323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,31,0.07562400102615356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,31,0.08718079924583436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,63,0.08404639959335328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,63,0.09253119826316833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,127,0.08648959994316101
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,127,0.10234559774398803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,255,0.12820639610290527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,255,0.12982399463653566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,511,0.21159520149230956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,511,0.18865439891815186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,1023,0.3779360055923462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,1023,0.28595359325408937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,1,0.020284800231456755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,1,0.020508800446987153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,2047,0.7140799999237061
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,2047,0.4714352130889893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,3,0.020347200334072113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,3,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,15,0.019809600710868836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,7,0.020113599300384522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,7,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,15,0.021113599836826324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,fp8,4095,0.8722000122070312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,31,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,31,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,32,128,1,float16,float16,4095,1.4005104064941407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,63,0.02040479928255081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,63,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,127,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,255,0.02158239930868149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,127,0.02104640007019043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,255,0.02311519980430603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,511,0.025099200010299683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,511,0.027225598692893982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,1023,0.025950399041175843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,1023,0.026470398902893065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,2047,0.030665600299835206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,2047,0.030339199304580688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,float16,4095,0.047628799080848695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,32,128,1,float16,fp8,4095,0.037390398979187014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,1,0.020552000403404234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,1,0.021241599321365358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,15,0.0204815998673439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,3,0.020545600354671477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,3,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,7,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,7,0.021454399824142455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,15,0.021532799303531646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,31,0.020360000431537628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,31,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,63,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,63,0.021273599565029146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,127,0.020580799877643587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,127,0.02163040041923523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,255,0.022380800545215608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,255,0.023371200263500213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,511,0.025382399559020996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,511,0.02731359899044037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,1023,0.03170560002326965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,1023,0.03041439950466156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,2047,0.04773440062999725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,2047,0.038736000657081604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,float16,4095,0.06970400214195252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,32,128,1,float16,fp8,4095,0.05618559718132019
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,1,0.04842239916324616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,1,0.05386719703674316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,15,0.05446239709854126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,3,0.04780319929122925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,3,0.05472800135612488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,7,0.04841760098934174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,7,0.05396800041198731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,15,0.047737601399421695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,31,0.04855999946594238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,31,0.05466560125350952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,63,0.05066239833831787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,63,0.05411999821662903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,127,0.05715360045433045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,127,0.056220799684524536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,255,0.0797648012638092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,255,0.07708160281181335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,511,0.11934399604797363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,511,0.10851680040359497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,1023,0.2051055908203125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,1023,0.16002880334854125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,2047,0.3711215972900391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,2047,0.2540496110916138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,1,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,1,0.021971200406551362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,3,0.020982399582862854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,3,0.0220320001244545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,float16,4095,0.7079247951507568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,32,128,1,float16,fp8,4095,0.45215520858764646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,7,0.02088479995727539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,7,0.021844799816608428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,15,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,15,0.02188960015773773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,31,0.021036800742149354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,31,0.022015999257564544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,63,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,63,0.02199999988079071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,127,0.021294400095939636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,127,0.021985599398612977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,255,0.02292799949645996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,255,0.02423200011253357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,511,0.028951999545097352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,511,0.027928000688552855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,1023,0.047753599286079404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,1023,0.0361871987581253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,2047,0.06914079785346985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,2047,0.05595679879188538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,float16,4095,0.11304160356521606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,32,128,1,float16,fp8,4095,0.08149120211601257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,1,0.13528319597244262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,1,0.1642799973487854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,15,0.16516159772872924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,15,0.13811520338058472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,3,0.13478879928588866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,3,0.16407999992370606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,7,0.13581119775772094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,7,0.16441760063171387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,31,0.14330400228500367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,31,0.16634080410003663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,63,0.14765759706497192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,63,0.17680959701538085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,127,0.15185920000076295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,127,0.1873487949371338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,255,0.2230992078781128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,255,0.2335968017578125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,511,0.4060704231262207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,511,0.3534111976623535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,1,0.25308799743652344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,1023,0.554633617401123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,1023,0.7278223991394043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,1,0.3076719999313354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,3,0.2520431995391846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,3,0.31076641082763673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,7,0.2523535966873169
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,15,0.2567392110824585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,7,0.3073472023010254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,float16,2047,1.397646427154541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,31,0.2699872016906738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,32,128,1,float16,fp8,2047,0.9212063789367676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,15,0.30797119140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,31,0.3207711935043335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,63,0.2701647996902466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,63,0.32791039943695066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,127,0.30227999687194823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,127,0.35144639015197754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,255,0.4156623840332031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,255,0.44101600646972655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,1,0.480295991897583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,1,0.5913599967956543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,3,0.48764958381652834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,float16,511,0.7905360221862793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,32,128,1,float16,fp8,511,0.6677567958831787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,3,0.5974864006042481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,7,0.4992112159729004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,7,0.6001408100128174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,15,0.5096223831176758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,15,0.5994607925415039
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,31,0.5137743949890137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,31,0.6295423984527588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,63,0.51387038230896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,63,0.6354640007019043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,127,0.539847993850708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,1,0.024707199633121492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,1,0.026144000887870788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,127,0.6654191970825195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,3,0.024483199417591094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,3,0.02633439898490906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,float16,255,0.7923503875732422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,7,0.024596799910068513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,7,0.026100799441337585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,32,128,1,float16,fp8,255,0.8488960266113281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,15,0.024854399263858795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,15,0.02648639976978302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,31,0.024638399481773376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,31,0.02601439952850342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,63,0.02449759989976883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,63,0.026121601462364197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,127,0.024907200038433074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,127,0.025992000102996828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,255,0.03144159913063049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,255,0.030267199873924254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,511,0.04695200026035309
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,511,0.04021599888801575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,1023,0.07081279754638672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,1023,0.05999839901924133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,2047,0.11367360353469849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,2047,0.08374239802360535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,float16,4095,0.19772640466690064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,32,128,1,float16,fp8,4095,0.13513280153274537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,1,0.9929823875427246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,1,1.1449007987976074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,3,0.9905599594116211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,3,1.1520272254943849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,7,0.9925135612487793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,7,1.2001824378967285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,15,0.9929840087890625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,15,1.230440044403076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,31,0.9967967987060546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,63,1.0016400337219238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,31,1.224078369140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,63,1.2310848236083984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,float16,127,1.0453007698059082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,32,128,1,float16,fp8,127,1.3044384002685547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,1,1.9485343933105468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,3,1.95009765625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,7,1.9293392181396485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,1,2.4314687728881834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,3,2.400624084472656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,15,1.950833511352539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,7,2.375886344909668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,1,0.03166080117225647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,1,0.03513439893722534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,3,0.0314848005771637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,3,0.03516480028629303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,7,0.03162240087985992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,31,1.9461999893188477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,7,0.03534240126609802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,15,2.3666271209716796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,15,0.031681600213050845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,31,0.03204320073127746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,15,0.03454239964485169
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,31,0.034355199337005614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,63,0.03156160116195679
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,63,0.03466399908065796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,float16,63,1.94486083984375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,127,0.03356800079345703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,31,2.3826976776123048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,127,0.03445279896259308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,255,0.05101760029792786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,255,0.04503679871559143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,32,128,1,float16,fp8,63,2.3497407913208006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,511,0.0782047986984253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,511,0.06419839859008789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,1023,0.11699039936065674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,1023,0.09157599806785584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,2047,0.20005440711975098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,2047,0.13865599632263184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,1,0.06140480041503906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,float16,4095,0.36716160774230955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,32,128,1,float16,fp8,4095,0.23790559768676758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,1,0.07088320255279541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,3,0.06129760146141052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,3,0.06966879963874817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,7,0.06254079937934875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,7,0.07114880084991455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,15,0.06155040264129639
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,15,0.07060800194740295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,31,0.06155359745025635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,31,0.07045599818229675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,63,0.06804959774017334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,63,0.07272480130195617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,127,0.07387999892234802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,127,0.08202559947967529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,255,0.10774879455566407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,255,0.10287359952926636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,511,0.16571680307388306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,511,0.14815520048141478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,1023,0.29418880939483644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,1023,0.2245232105255127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,1,0.01974879950284958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,2047,0.36910560131073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,2047,0.544001579284668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,1,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,3,0.020179200172424316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,3,0.02127680033445358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,7,0.019763199985027312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,7,0.02123039960861206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,15,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,15,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,float16,4095,1.05523681640625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,24,24,128,1,float16,fp8,4095,0.6618303775787353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,31,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,255,0.021827200055122377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,31,0.02141759991645813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,63,0.020073600113391876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,63,0.021190400421619415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,127,0.020257599651813507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,127,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,255,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,511,0.024977600574493407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,511,0.02650879919528961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,1023,0.02465600073337555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,1023,0.025942400097846985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,2047,0.02800000011920929
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,2047,0.028140801191329955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,float16,4095,0.04175359904766083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,24,24,128,1,float16,fp8,4095,0.034406399726867674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,1,0.020596800744533537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,1,0.021195200085639954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,3,0.020619200170040132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,3,0.021060800552368163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,7,0.020420800149440765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,7,0.021406400203704833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,15,0.02041600048542023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,15,0.021715199947357176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,31,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,31,0.020982399582862854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,63,0.02064639925956726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,63,0.021211199462413788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,127,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,127,0.021436800062656403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,1023,0.03272959887981415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,2047,0.05008959770202637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,255,0.022649599611759184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,255,0.023398399353027344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,511,0.02550239861011505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,511,0.026550400257110595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,1023,0.02818560004234314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,2047,0.040427199006080626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,float16,4095,0.07260479927062988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,24,24,128,1,float16,fp8,4095,0.06730239987373351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,1,0.040966400504112245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,1,0.04539520144462585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,3,0.04038560092449188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,3,0.044772800803184507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,7,0.0406495988368988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,7,0.04497120082378388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,15,0.04039359986782074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,15,0.04538399875164032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,31,0.04081439971923828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,31,0.04503999948501587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,63,0.04047519862651825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,63,0.045311999320983884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,127,0.048502400517463684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,127,0.045603200793266296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,255,0.06701120138168334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,255,0.06435679793357849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,511,0.09623519778251648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,511,0.08690720200538635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,1023,0.16210880279541015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,1023,0.12735199928283691
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,2047,0.287992000579834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,2047,0.19890880584716797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,1,0.020619200170040132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,1,0.021803200244903564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,3,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,float16,4095,0.5407104015350341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,15,0.020839999616146087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,24,24,128,1,float16,fp8,4095,0.34889280796051025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,3,0.021515199542045595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,7,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,7,0.021401600539684297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,15,0.021585600078105928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,31,0.021777600049972534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,31,0.020785599946975708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,63,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,63,0.021817600727081297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,127,0.02093919962644577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,127,0.021345600485801697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,255,0.022487999498844148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,255,0.023771199584007262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,2047,0.06256960034370422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,511,0.025737598538398743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,511,0.027430400252342224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,1023,0.044854399561882016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,1023,0.03922719955444336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,2047,0.05889599919319153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,float16,4095,0.0978767991065979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,24,24,128,1,float16,fp8,4095,0.08450239896774292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,1,0.10656960010528564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,1,0.12945280075073243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,3,0.10743199586868286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,3,0.12809280157089234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,7,0.1071727991104126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,7,0.12873920202255248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,15,0.1090880036354065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,15,0.12863999605178833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,31,0.11018400192260742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,31,0.12968000173568725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,63,0.11580799818038941
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,63,0.13785920143127442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,127,0.12252000570297242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,127,0.14709279537200928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,255,0.1781615972518921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,255,0.18352479934692384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,511,0.3115504026412964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,511,0.2750191926956177
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,1023,0.5534687995910644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,1023,0.4247600078582764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,1,0.19443199634552003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,3,0.23621439933776855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,1,0.2359407901763916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,3,0.19325759410858154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,7,0.1949504017829895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,7,0.23483359813690186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,15,0.19528000354766845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,float16,2047,1.059217643737793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,24,24,128,1,float16,fp8,2047,0.7037471771240235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,15,0.23645598888397218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,31,0.2060960054397583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,31,0.23906879425048827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,63,0.20814878940582277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,63,0.25057759284973147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,127,0.2183919906616211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,127,0.2757567882537842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,255,0.3216415882110596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,255,0.3359008073806763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,1,0.36770238876342776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,float16,511,0.6025712013244628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,1,0.44613118171691896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,24,24,128,1,float16,fp8,511,0.5059951782226563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,3,0.3711999893188477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,3,0.4486368179321289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,7,0.36972320079803467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,7,0.44768319129943845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,15,0.3871392011642456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,15,0.4487296104431152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,31,0.38904800415039065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,31,0.4784912109375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,63,0.39733119010925294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,63,0.47554240226745603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,127,0.41187200546264646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,1,0.024406400322914124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,127,0.5101200103759765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,1,0.025897601246833803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,float16,255,0.6125552177429199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,7,0.026030400395393373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,3,0.02454400062561035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,24,24,128,1,float16,fp8,255,0.6408736228942871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,3,0.025990399718284606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,7,0.024369600415229797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,15,0.024323199689388276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,31,0.02441920042037964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,15,0.025856000185012818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,31,0.026144000887870788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,63,0.02459519952535629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,63,0.02610880136489868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,127,0.024323199689388276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,127,0.02608320116996765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,1023,0.06158400177955627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,255,0.02823199927806854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,255,0.030019199848175047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,511,0.04231359958648682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,511,0.0369951993227005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,1023,0.059443199634552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,2047,0.09584959745407104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,2047,0.08431199789047242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,float16,4095,0.164628803730011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,24,24,128,1,float16,fp8,4095,0.13665119409561158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,1,0.7436880111694336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,1,0.8585647583007813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,3,0.7375919818878174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,3,0.8619824409484863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,7,0.7486879825592041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,15,0.7514624118804931
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,7,0.8671648025512695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,31,0.7521967887878418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,15,0.9398048400878907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,31,0.9252256393432617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,63,0.7593039989471435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,63,0.9258015632629395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,float16,127,0.7957903861999511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,24,24,128,1,float16,fp8,127,0.9886816024780274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,1,1.4531744003295899
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,3,1.4682767868041993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,1,1.833176040649414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,7,1.4659775733947753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,3,1.8157232284545899
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,1,0.028246399760246278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,7,1.8276079177856446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,15,1.4630000114440918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,1,0.030476799607276915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,3,0.028593599796295166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,31,1.4513504028320312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,15,1.825204849243164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,3,0.03018240034580231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,7,0.02821120023727417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,7,0.030527999997138976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,float16,63,1.4960080146789552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,31,1.771651268005371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,15,0.027795198559761047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,15,0.030564799904823303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,31,0.02805759906768799
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,31,0.02980639934539795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,63,0.028516799211502075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,24,24,128,1,float16,fp8,63,1.784756851196289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,63,0.030211201310157774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,127,0.02839680016040802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,127,0.030164799094200133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,255,0.04366079866886139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,255,0.03622879981994629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,511,0.05907040238380432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,511,0.05364959836006165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,1023,0.09490240216255189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,1023,0.07551360130310059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,2047,0.15719679594039918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,2047,0.11121280193328857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,float16,4095,0.28471519947052004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,1,0.046670401096343996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,24,24,128,1,float16,fp8,4095,0.18634079694747924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,15,0.04764159917831421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,1,0.05297600030899048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,3,0.04686239957809448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,3,0.05305439829826355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,7,0.04656479954719543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,7,0.05281280279159546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,15,0.05394560098648071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,31,0.04774560034275055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,31,0.05397599935531616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,63,0.049649599194526675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,63,0.0537056028842926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,127,0.05743200182914734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,127,0.05627040266990661
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,255,0.0823535978794098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,255,0.07758560180664062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,511,0.12212799787521363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,511,0.10728000402450562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,1023,0.21198720932006837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,1023,0.15884159803390502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,2047,0.37667040824890136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,2047,0.25272479057312014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,1,0.019867199659347533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,1,0.02048480063676834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,4095,0.7125199794769287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,4095,0.4523439884185791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,3,0.01995680034160614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,3,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,7,0.019832000136375427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,7,0.02089280039072037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,fp8,8191,0.8521023750305176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,15,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,16,128,1,float16,float16,8191,1.3964768409729005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,15,0.021006399393081666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,31,0.019944000244140624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,31,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,63,0.02012320011854172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,63,0.021129600703716278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,127,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,127,0.021155199408531188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,255,0.021982400119304656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,255,0.022968000173568724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,511,0.024456000328063963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,2047,0.027084800601005554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,511,0.02659200131893158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,1023,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,1023,0.026001599431037904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,8191,0.0398144006729126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,1,0.020342400670051573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,2047,0.02641119956970215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,4095,0.03012000024318695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,fp8,4095,0.030353599786758424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,16,128,1,float16,float16,8191,0.04863680005073547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,1,0.021712000668048858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,3,0.02052319943904877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,3,0.021108800172805788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,7,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,31,0.021508799493312837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,7,0.02131839990615845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,15,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,15,0.0215488001704216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,31,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,63,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,63,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,127,0.020444799959659577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,127,0.021252800524234772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,255,0.022305600345134735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,511,0.02542720139026642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,255,0.02332320064306259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,511,0.027476799488067628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,1023,0.026616001129150392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,4095,0.03963040113449097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,1023,0.026631999015808105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,2047,0.03044320046901703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,4095,0.04862079918384552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,2047,0.03059679865837097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,float16,8191,0.0704096019268036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,1,0.033580800890922545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,7,0.03628000020980835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,16,128,1,float16,fp8,8191,0.05600320100784302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,1,0.03611519932746887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,3,0.03282879889011383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,3,0.0364544004201889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,7,0.03319360017776489
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,15,0.03302879929542542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,15,0.036550399661064145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,31,0.033430400490760806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,31,0.03584319949150085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,63,0.03330720067024231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,63,0.036111998558044436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,127,0.03618400096893311
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,127,0.036208000779151914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,255,0.052420800924301146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,255,0.04800960123538971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,511,0.07442079782485962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,511,0.06581119894981384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,1023,0.12027839422225953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,1023,0.09398880004882812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,2047,0.2041088104248047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,2047,0.1417695999145508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,4095,0.37109599113464353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,4095,0.2445375919342041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,1,0.020793600380420683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,1,0.02123039960861206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,3,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,3,0.021615999937057494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,float16,8191,0.713595199584961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,16,128,1,float16,fp8,8191,0.43867201805114747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,7,0.020577600598335265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,7,0.021614399552345277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,15,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,15,0.021355199813842773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,31,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,31,0.021521599590778352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,63,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,511,0.025441598892211915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,63,0.021480000019073485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,127,0.02062239944934845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,127,0.021478399634361267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,255,0.022771200537681578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,255,0.0237184002995491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,511,0.027208000421524048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,1023,0.02868480086326599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,1023,0.030320000648498536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,2047,0.04901120066642761
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,8191,0.08162400126457214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,1,0.07831199765205384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,2047,0.03687199950218201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,1,0.09185760021209717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,4095,0.0702239990234375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,fp8,4095,0.05677279829978943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,16,128,1,float16,float16,8191,0.11354720592498779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,3,0.07851200103759766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,3,0.09207839965820312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,7,0.07832959890365601
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,7,0.0918720006942749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,15,0.07844480276107788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,15,0.09271519780158996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,31,0.07985919713973999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,31,0.09268320202827454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,63,0.08695520162582397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,63,0.09535040259361267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,127,0.0906224012374878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,127,0.10624639987945557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,255,0.13791680335998535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,255,0.13151999711990356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,511,0.22261440753936768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,511,0.19130560159683227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,1023,0.29267520904541017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,1023,0.381662392616272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,1,0.13703839778900145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,1,0.16298880577087402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,3,0.13640480041503905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,2047,0.4837647914886475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,2047,0.7225823879241944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,3,0.1645248055458069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,7,0.13584159612655639
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,7,0.16416800022125244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,15,0.13793599605560303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,15,0.16352640390396117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,31,0.14479999542236327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,fp8,4095,0.888270378112793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,31,0.16305279731750488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,63,0.1466096043586731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,16,128,1,float16,float16,4095,1.404535961151123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,63,0.17872480154037476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,127,0.15546720027923583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,127,0.18528159856796264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,255,0.2508496046066284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,255,0.2332832098007202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,511,0.4180463790893555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,511,0.3623264074325562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,1,0.2555936098098755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,1,0.3074512004852295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,3,0.2534127950668335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,3,0.30553441047668456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,float16,1023,0.7248976230621338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,16,128,1,float16,fp8,1023,0.561352014541626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,7,0.2536815881729126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,7,0.3043567895889282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,15,0.26143999099731446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,15,0.3063839912414551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,31,0.2707551956176758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,31,0.32857279777526854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,63,0.2785696029663086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,127,0.28882880210876466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,63,0.33692800998687744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,127,0.34748799800872804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,255,0.4337296009063721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,1,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,255,0.4412975788116455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,1,0.02178560048341751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,3,0.02123199999332428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,3,0.021854400634765625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,float16,511,0.8069408416748047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,7,0.02099999934434891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,7,0.02205120027065277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,63,0.02107519954442978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,63,0.021775999665260316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,16,128,1,float16,fp8,511,0.6716944217681885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,15,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,15,0.021830399334430695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,31,0.02122559994459152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,31,0.021942399442195892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,127,0.02133760005235672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,127,0.0217631995677948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,255,0.022859199345111846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,255,0.023932799696922302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,511,0.027671998739242552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,511,0.027750399708747864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,1023,0.048107200860977174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,1023,0.03562879860401154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,8191,0.1996783971786499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,2047,0.06960480213165283
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,2047,0.05577279925346375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,float16,4095,0.11293120384216308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,4095,0.08075680136680603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,16,128,1,float16,fp8,8191,0.1313472032546997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,1,0.48432002067565916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,1,0.5819263935089112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,3,0.4844912052154541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,3,0.5903744220733642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,7,0.49445600509643556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,7,0.5862287998199462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,15,0.5103104114532471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,15,0.6032959938049316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,31,0.5129663944244385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,31,0.6312880039215087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,63,0.5174736022949219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,63,0.6275472164154052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,127,0.5502223968505859
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,127,0.6587264060974121
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,float16,255,0.8070976257324218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,1,0.9843279838562011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,16,128,1,float16,fp8,255,0.8450223922729492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,3,0.9850079536437988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,1,1.1410160064697266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,7,0.986143970489502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,3,1.15098876953125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,15,0.9794159889221191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,7,1.1789888381958007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,31,0.9918383598327637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,15,1.2154272079467774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,31,1.2139439582824707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,1,0.02459519952535629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,1,0.026017600297927858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,63,1.001910400390625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,3,0.024449600279331206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,3,0.026076799631118773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,63,1.2237024307250977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,7,0.024512000381946564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,float16,127,1.0537376403808594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,7,0.02611039876937866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,15,0.024587200582027437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,15,0.025911998748779298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,16,128,1,float16,fp8,127,1.2585007667541503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,31,0.024376000463962554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,31,0.02597759962081909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,255,0.030028799176216127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,63,0.02421119958162308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,63,0.02592960000038147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,127,0.024716800451278685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,127,0.025969600677490233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,1023,0.05947200059890747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,255,0.029820799827575684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,511,0.04729759991168976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,511,0.03856320083141327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,1023,0.07258399724960327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,2047,0.11548160314559937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,2047,0.08289759755134582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,4095,0.1333583950996399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,4095,0.20012478828430175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,1,0.0402895987033844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,float16,8191,0.37362399101257326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,1,0.04423680007457733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,16,128,1,float16,fp8,8191,0.2326064109802246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,3,0.040043199062347413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,3,0.044947201013565065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,7,0.04019359946250915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,7,0.04552319943904877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,15,0.0404448002576828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,31,0.03987999856472015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,15,0.04488799870014191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,31,0.044689598679542544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,63,0.040166398882865904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,63,0.04502080082893371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,511,0.0868511974811554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,127,0.049399998784065244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,127,0.04623839855194092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,255,0.06882879734039307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,255,0.06458079814910889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,511,0.09854080080986023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,1023,0.16609599590301513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,1023,0.12676479816436767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,2047,0.2907808065414429
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,2047,0.19876159429550172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,1,0.01992959976196289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,4095,0.5432608127593994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,4095,0.3482480049133301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,1,0.020468799769878386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,3,0.01971839964389801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,3,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,7,0.01987839937210083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,7,0.020931200683116914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,fp8,8191,0.6485007762908935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,15,0.01987999975681305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,12,12,128,1,float16,float16,8191,1.0515119552612304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,15,0.020528000593185425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,31,0.01966399997472763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,31,0.021022400259971617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,63,0.01992480009794235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,63,0.021030400693416596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,127,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,127,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,255,0.022609600424766542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,255,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,511,0.024540799856185912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,511,0.026984000205993654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,1023,0.025203201174736022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,1023,0.026519998908042908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,2047,0.02560960054397583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,2047,0.026980799436569215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,4095,0.028808000683784484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,4095,0.02882240116596222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,float16,8191,0.04305759966373444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,12,12,128,1,float16,fp8,8191,0.035041600465774536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,1,0.02040639966726303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,1,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,3,0.02035519927740097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,3,0.021265600621700288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,7,0.02020000070333481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,7,0.021080000698566435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,15,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,15,0.02131839990615845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,31,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,31,0.021140800416469575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,63,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,63,0.021195200085639954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,127,0.02038719952106476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,127,0.021425600349903106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,255,0.021955199539661407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,255,0.023175999522209167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,511,0.024967999756336214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,511,0.026708799600601196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,1023,0.02598080039024353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,1023,0.027518400549888612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,2047,0.029211199283599852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,2047,0.029364800453186034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,4095,0.042638400197029115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,4095,0.035438400506973264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,float16,8191,0.060164797306060794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,12,12,128,1,float16,fp8,8191,0.05212000012397766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,1,0.029271999001502992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,1,0.0314736008644104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,3,0.029174399375915528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,3,0.03130399882793426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,7,0.029286399483680725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,7,0.03144159913063049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,15,0.0290336012840271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,15,0.03129599988460541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,31,0.029083201289176942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,31,0.0314767986536026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,63,0.02924000024795532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,63,0.03136320114135742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,127,0.02959200143814087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,127,0.03143840134143829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,255,0.04466080069541931
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,255,0.03747040033340454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,511,0.06068159937858582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,511,0.05518239736557007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,1023,0.09662560224533082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,1023,0.07807360291481018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,2047,0.15971839427947998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,2047,0.11384799480438232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,4095,0.28615360260009765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,4095,0.19092639684677123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,1,0.020444799959659577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,1,0.021568000316619873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,3,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,float16,8191,0.5423183917999268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,12,12,128,1,float16,fp8,8191,0.3380784034729004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,3,0.021316799521446227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,7,0.020561599731445314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,7,0.021371200680732727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,15,0.02062239944934845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,15,0.021265600621700288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,31,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,31,0.021358400583267212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,63,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,63,0.022100800275802614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,127,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,127,0.02126079946756363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,255,0.022443200647830962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,255,0.023419199883937834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,511,0.025257599353790284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,511,0.02731359899044037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,1023,0.0293071985244751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,1023,0.03398880064487457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,2047,0.0512175977230072
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,2047,0.04168640077114105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,4095,0.0736847996711731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,4095,0.06846399903297425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,float16,8191,0.12039999961853028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,12,12,128,1,float16,fp8,8191,0.10223040580749512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,1,0.06408159732818604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,1,0.07429119944572449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,3,0.06399679780006409
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,3,0.07400320172309875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,7,0.06419519782066345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,7,0.07367519736289978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,15,0.06359999775886535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,15,0.073852801322937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,127,0.08548799753189087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,31,0.06406239867210388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,31,0.07432320117950439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,63,0.07107359766960145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,63,0.07432479858398437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,511,0.14988800287246704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,127,0.07870240211486816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,255,0.11366879940032959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,255,0.1059872031211853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,511,0.17196799516677858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,1023,0.29999840259552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,1023,0.22805919647216796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,1,0.10877439975738526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,2047,0.3675551891326904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,2047,0.5502831935882568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,1,0.12989439964294433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,3,0.10853760242462158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,3,0.12992000579833984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,7,0.12887200117111205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,7,0.10796159505844116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,fp8,4095,0.6739520072937012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,15,0.10872479677200317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,12,12,128,1,float16,float16,4095,1.0569135665893554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,15,0.12992639541625978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,31,0.11037919521331788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,31,0.12887200117111205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,63,0.11885279417037964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,63,0.14132479429244996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,127,0.1242751955986023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,127,0.15362240076065065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,255,0.204367995262146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,255,0.18435679674148558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,511,0.31988799571990967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,511,0.2741408109664917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,fp8,1023,0.4278656005859375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,1,0.1964128017425537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,12,12,128,1,float16,float16,1023,0.5585631847381591
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,1,0.23683040142059325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,15,0.19762400388717652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,3,0.19856159687042235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,3,0.2362368106842041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,7,0.1963055968284607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,7,0.2365648031234741
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,31,0.21032800674438476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,63,0.252459192276001
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,15,0.24119360446929933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,31,0.2401263952255249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,63,0.2150399923324585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,127,0.22778079509735108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,127,0.27010560035705566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,255,0.38220479488372805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,255,0.3373248100280762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,3,0.020465600490570068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,1,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,1,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,float16,511,0.6186031818389892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,3,0.021540799736976625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,12,12,128,1,float16,fp8,511,0.5112319946289062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,7,0.021550400555133818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,7,0.02064639925956726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,15,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,15,0.02187040001153946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,31,0.020604799687862396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,31,0.021878400444984437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,63,0.02078080028295517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,63,0.021537600457668303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,511,0.027372801303863527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,127,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,127,0.02152319997549057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,255,0.02255840003490448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,255,0.02396959960460663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,511,0.025705599784851076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,1023,0.04564160108566284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,1023,0.0397599995136261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,2047,0.06321920156478882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,2047,0.05995519757270813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,4095,0.09827520251274109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,4095,0.08587999939918518
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,float16,8191,0.16839679479598998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,12,12,128,1,float16,fp8,8191,0.1355631947517395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,1,0.3764832019805908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,1,0.4437727928161621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,3,0.3703200101852417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,3,0.44587039947509766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,7,0.3718559980392456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,7,0.4477215766906738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,15,0.39303839206695557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,15,0.45063037872314454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,31,0.3990000009536743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,63,0.48319358825683595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,31,0.47779359817504885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,63,0.39693760871887207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,127,0.4234799861907959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,127,0.5122191905975342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,float16,255,0.7053167819976807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,1,0.7469007968902588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,12,12,128,1,float16,fp8,255,0.6421343803405761
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,3,0.737281608581543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,1,0.8593695640563965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,3,0.8621312141418457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,7,0.7467711925506592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,7,0.8704112052917481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,15,0.7521967887878418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,15,0.9209360122680664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,31,0.7546800136566162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,31,0.9205840110778809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,1,0.024465599656105043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,63,0.762556791305542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,1,0.02576799988746643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,63,0.9275088310241699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,float16,127,0.8084063529968262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,7,0.02435680031776428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,3,0.02579520046710968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,3,0.02447039932012558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,15,0.024315199255943297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,7,0.02568320035934448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,12,12,128,1,float16,fp8,127,0.9645008087158203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,15,0.025868800282478333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,31,0.024324800074100494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,31,0.025915199518203737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,63,0.02452320009469986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,63,0.02581920027732849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,127,0.024476799368858337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,127,0.025856000185012818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,255,0.027959999442100526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,255,0.029905599355697633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,511,0.041812801361083986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,2047,0.08353599905967712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,511,0.037459200620651244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,1023,0.061596798896789554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,1023,0.05934240221977234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,2047,0.09585599899291992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,4095,0.16490559577941893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,4095,0.1346735954284668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,3,0.032707199454307556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,1,0.03312639892101288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,1,0.03612639904022217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,fp8,8191,0.24048159122467042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,12,12,128,1,float16,float16,8191,0.30146079063415526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,3,0.036259201169013974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,7,0.03284800052642822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,7,0.035876798629760745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,15,0.03307999968528748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,15,0.036078399419784545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,31,0.033283200860023496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,31,0.03601279854774475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,63,0.033452799916267394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,63,0.036292800307273866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,127,0.03543039858341217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,127,0.03656319975852966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,255,0.05327039957046509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,255,0.045542401075363156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,511,0.07425280213356018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,511,0.066184002161026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,1023,0.12070399522781372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,1023,0.09447360038757324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,2047,0.20703840255737305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,2047,0.14331200122833251
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,4095,0.2447376012802124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,4095,0.3740272045135498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,1,0.020000000298023225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,1,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,8191,0.7153679847717285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,3,0.02011680006980896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,8191,0.44191679954528806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,3,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,7,0.01992959976196289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,7,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,15,0.019766399264335634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,15,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,fp8,16383,0.831937599182129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,31,0.019990399479866028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,31,0.02107200026512146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,8,128,1,float16,float16,16383,1.4024864196777345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,63,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,63,0.020814399421215057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,127,0.01990399956703186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,127,0.020905600488185884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,255,0.021931199729442595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,255,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,511,0.02483839988708496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,1023,0.024358400702476503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,511,0.02699680030345917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,1023,0.02560960054397583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,2047,0.02483839988708496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,2047,0.02606239914894104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,4095,0.026998400688171387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,4095,0.02739039957523346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,8191,0.031108799576759338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,8191,0.031121599674224853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,float16,16383,0.04832960069179535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,8,128,1,float16,fp8,16383,0.03732640147209167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,1,0.020207999646663664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,1,0.021147200465202333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,3,0.020284800231456755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,3,0.021052800118923187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,7,0.020448000729084016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,7,0.02096160054206848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,15,0.020259200036525725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,15,0.021001599729061127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,31,0.020315200090408325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,31,0.021031999588012697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,63,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,63,0.021038399636745454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,127,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,127,0.02086720019578934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,255,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,255,0.023022399842739107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,511,0.024899199604988098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,511,0.026633599400520326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,1023,0.024977600574493407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,1023,0.025964799523353576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,2047,0.02669120132923126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,2047,0.027000001072883605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,4095,0.0302592009305954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,4095,0.030268800258636475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,8191,0.04843519926071167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,8191,0.038201600313186646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,float16,16383,0.07016159892082215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,8,128,1,float16,fp8,16383,0.05647839903831482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,1,0.025367999076843263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,1,0.02677600085735321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,3,0.025313600897789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,3,0.026953598856925963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,7,0.02555679976940155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,7,0.026819199323654175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,15,0.025177600979804992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,15,0.026881599426269533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,31,0.025300800800323486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,31,0.026851201057434083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,63,0.025624001026153566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,63,0.02714560031890869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,127,0.02571519911289215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,127,0.0268528014421463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,255,0.03099679946899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,255,0.03111039996147156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,511,0.04805760085582733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,511,0.041596800088882446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,1023,0.07511839866638184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,1023,0.06087200045585632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,2047,0.11689120531082153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,2047,0.08496800065040588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,4095,0.2001391887664795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,4095,0.13559360504150392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,8191,0.3732959985733032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,1,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,8191,0.23716480731964112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,3,0.021129600703716278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,1,0.021388800442218782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,3,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,7,0.02038239985704422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,7,0.021167999505996703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,fp8,16383,0.42842879295349123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,8,128,1,float16,float16,16383,0.7174431800842285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,15,0.02099200040102005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,15,0.020372800529003143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,31,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,31,0.021425600349903106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,63,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,63,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,127,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,127,0.02158239930868149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,255,0.022228799760341644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,255,0.023083199560642243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,511,0.025241601467132568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,511,0.027136000990867614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,1023,0.026199999451637267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,1023,0.026732799410820008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,2047,0.02839039862155914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,2047,0.03043360114097595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,4095,0.04819679856300354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,4095,0.03950079977512359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,8191,0.06988959908485412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,8191,0.05591679811477661
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,float16,16383,0.11319839954376221
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,8,128,1,float16,fp8,16383,0.08137440085411071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,1,0.04927679896354675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,1,0.055315202474594115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,3,0.04919840097427368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,3,0.055657601356506346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,7,0.04891999959945679
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,7,0.055934399366378784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,15,0.04933120012283325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,15,0.05559200048446655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,31,0.049060800671577455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,31,0.05541279911994934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,63,0.05161920189857483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,63,0.0557856023311615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,127,0.05853279829025269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,127,0.058254402875900266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,255,0.08363360166549683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,255,0.07922239899635315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,511,0.12408479452133178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,511,0.10907360315322875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,1023,0.21435840129852296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,1023,0.16198079586029052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,2047,0.3831183910369873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,2047,0.25672640800476076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,1,0.08015999794006348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,4095,0.7194128036499023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,4095,0.45937438011169435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,1,0.09386399984359742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,3,0.07987359762191773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,3,0.09387040138244629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,7,0.08002079725265503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,7,0.09411200284957885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,15,0.08076800107955932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,15,0.09387999773025513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,fp8,8191,0.8567600250244141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,31,0.07993119955062866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,8,128,1,float16,float16,8191,1.398635196685791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,31,0.09413120150566101
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,63,0.08837440013885497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,63,0.09878399968147278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,127,0.09790239930152893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,127,0.10801759958267212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,255,0.14627840518951415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,255,0.13785439729690552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,511,0.2256239891052246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,511,0.1932976007461548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,1023,0.38765759468078614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,1023,0.29632000923156737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,1,0.1393440008163452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,float16,2047,0.7280767917633056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,1,0.16862720251083374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,3,0.14046560525894164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,3,0.16877280473709105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,8,128,1,float16,fp8,2047,0.4904816150665283
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,7,0.1405344009399414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,7,0.16690080165863036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,15,0.14078400135040284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,15,0.16825759410858154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,31,0.14896320104598998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,31,0.16907360553741455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,63,0.15797280073165892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,63,0.18117120265960693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,127,0.16202239990234374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,127,0.18976160287857055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,255,0.24674561023712158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,255,0.23973441123962402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,511,0.4274752140045166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,511,0.3631295919418335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,1,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,1,0.02162559926509857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,3,0.02078399956226349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,3,0.021580800414085388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,float16,1023,0.7381343841552734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,7,0.02094399929046631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,8,128,1,float16,fp8,1023,0.5621103763580322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,31,0.021488000452518464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,7,0.021356800198554994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,15,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,15,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,31,0.020580799877643587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,63,0.020798400044441223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,127,0.020579199492931365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,63,0.021345600485801697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,127,0.02152000069618225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,255,0.02234400063753128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,255,0.023358400166034698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,511,0.02542240023612976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,511,0.02767840027809143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,1023,0.028863999247550964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,1023,0.029790401458740234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,2047,0.04893920123577118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,2047,0.04015519917011261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,4095,0.0703440010547638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,4095,0.05688639879226685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,8191,0.11396160125732421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,1,0.309334397315979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,3,0.25769920349121095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,8191,0.08140479922294616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,float16,16383,0.1999392032623291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,8,128,1,float16,fp8,16383,0.13124639987945558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,1,0.256712007522583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,3,0.30933120250701907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,7,0.2560863971710205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,7,0.3097935914993286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,15,0.2628511905670166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,15,0.31416161060333253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,127,0.29390881061553953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,31,0.27432799339294434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,31,0.33453280925750734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,63,0.2775808095932007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,63,0.3355920076370239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,127,0.3476032018661499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,255,0.4368783950805664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,255,0.4517519950866699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,1,0.48597121238708496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,1,0.5822239875793457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,3,0.4906464099884033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,float16,511,0.8297007560729981
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,8,128,1,float16,fp8,511,0.6777056217193603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,3,0.5819071769714356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,7,0.49390082359313964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,15,0.5104127883911133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,7,0.5940800189971924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,15,0.6039103984832763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,31,0.5138480186462402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,63,0.6305535793304443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,31,0.6282112121582031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,63,0.5192255973815918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,127,0.549283218383789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,1,0.020921599864959717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,1,0.02171359956264496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,7,0.021619200706481934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,127,0.6476384162902832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,15,0.020582400262355804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,fp8,255,0.8438879966735839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,3,0.021060800552368163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,3,0.02168159931898117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,7,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,8,128,1,float16,float16,255,0.8190560340881348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,15,0.021580800414085388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,31,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,31,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,63,0.020851199328899384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,63,0.02160799950361252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,127,0.0210207998752594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,127,0.02202560007572174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,255,0.02269279956817627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,255,0.02367199957370758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,511,0.027609598636627198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,511,0.027452799677848815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,1023,0.04796639978885651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,1023,0.0350735992193222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,2047,0.06934880018234253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,2047,0.056380802392959596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,4095,0.11291840076446533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,4095,0.08066400289535522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,8191,0.1982319951057434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,8191,0.13231680393218995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,1,0.025916799902915955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,float16,16383,0.3711711883544922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,8,128,1,float16,fp8,16383,0.23056960105895996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,1,0.02701599895954132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,3,0.025854399800300597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,7,0.02764959931373596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,3,0.02707839906215668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,7,0.026081600785255434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,15,0.02610880136489868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,15,0.027348798513412476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,31,0.025969600677490233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,31,0.02741599977016449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,63,0.025931200385093688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,63,0.027110400795936584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,511,0.04854240119457245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,127,0.027779200673103334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,127,0.02609120011329651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,255,0.029734399914741517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,255,0.03166880011558533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,511,0.04266560077667236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,1023,0.07467520236968994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,1023,0.061692798137664796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,2047,0.11752159595489502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,2047,0.08699359893798828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,4095,0.20258400440216065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,4095,0.13558720350265502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,8191,0.3727823972702026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,8191,0.23615360260009766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,1,0.01988479942083359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,16383,0.4342207908630371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,16383,0.7164591789245606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,1,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,3,0.02003840059041977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,3,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,7,0.019852800667285918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,7,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,15,0.019676800072193145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,15,0.02093279957771301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,fp8,32767,0.8330479621887207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,4,128,1,float16,float16,32767,1.408017635345459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,31,0.019836799800395967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,31,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,63,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,63,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,127,0.019551999866962433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,127,0.020873600244522096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,255,0.021825599670410156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,255,0.022675199806690215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,511,0.024665600061416625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,511,0.02656480073928833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,1023,0.025260800123214723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,1023,0.02650400102138519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,2047,0.025361600518226623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,2047,0.026759999990463256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,4095,0.025681599974632263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,4095,0.02688480019569397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,8191,0.02754240036010742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,8191,0.02805280089378357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,16383,0.03285120129585266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,16383,0.031836798787117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,float16,32767,0.04965279996395111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,4,128,1,float16,fp8,32767,0.04252960085868836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,1,0.02040960043668747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,1,0.02104640007019043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,31,0.020428800582885744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,3,0.020025600492954255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,3,0.020955200493335723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,7,0.020006400346755982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,7,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,15,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,15,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,31,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,63,0.02043039947748184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,63,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,127,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,127,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,255,0.021748800575733186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,255,0.02284640073776245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,511,0.02481600046157837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,511,0.026840001344680786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,1023,0.025526401400566102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,1023,0.0271263986825943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,2047,0.025916799902915955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,2047,0.02696160078048706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,4095,0.02720640003681183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,4095,0.02773280143737793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,8191,0.030895999073982237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,8191,0.031785601377487184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,16383,0.0489520013332367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,16383,0.04106079936027527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,float16,32767,0.07264800071716308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,4,128,1,float16,fp8,32767,0.058841598033905027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,1,0.02131039947271347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,1,0.022625599801540375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,3,0.02159679979085922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,3,0.022291199862957002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,7,0.02143840044736862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,7,0.02240640074014664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,15,0.02144960016012192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,15,0.0221328005194664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,31,0.021568000316619873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,31,0.022380800545215608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,63,0.02136320024728775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,63,0.022441600263118745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,127,0.021465599536895752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,127,0.022467200458049775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,255,0.023201599717140198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,255,0.0243136003613472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,511,0.027911999821662904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,511,0.028182399272918702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,1023,0.04845919907093048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,1023,0.03608480095863342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,2047,0.0706287980079651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,2047,0.056176000833511354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,4095,0.11454399824142455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,4095,0.08145279884338379
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,8191,0.19993760585784912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,8191,0.1315376043319702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,16383,0.3732111930847168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,3,0.020260800421237946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,16383,0.23275198936462402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,1,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,1,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,3,0.021134400367736818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,float16,32767,0.718716812133789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,4,128,1,float16,fp8,32767,0.42769761085510255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,7,0.021089600026607515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,63,0.01977919936180115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,63,0.02120479941368103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,127,0.020478400588035583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,7,0.02022880017757416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,15,0.020319999754428865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,15,0.021147200465202333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,31,0.020263999700546265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,31,0.021007999777793884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,127,0.02098720073699951
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,255,0.021991999447345735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,255,0.023369599878787995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,511,0.025067201256752013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,511,0.026704001426696777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,1023,0.025838398933410646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,1023,0.02697120010852814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,2047,0.02739199995994568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,16383,0.07189120054244995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,2047,0.0274399995803833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,4095,0.03130080103874207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,4095,0.031543999910354614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,8191,0.04954400062561035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,8191,0.03793599903583526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,16383,0.057067197561264035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,float16,32767,0.11526559591293335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,4,128,1,float16,fp8,32767,0.08166559934616088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,1,0.034771201014518735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,1,0.03812800049781799
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,3,0.03528479933738708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,3,0.038171198964118955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,7,0.0348688006401062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,7,0.03821440041065216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,15,0.03474079966545105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,15,0.03794719874858856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,31,0.03504000008106232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,31,0.03813759982585907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,63,0.03529120087623596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,511,0.07597280144691468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,63,0.03829759955406189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,127,0.03642399907112122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,1023,0.12261279821395873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,127,0.038571199774742125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,255,0.0558896005153656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,255,0.04766559898853302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,511,0.06792799830436706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,1023,0.0959007978439331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,4095,0.24520161151885986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,2047,0.20848000049591064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,2047,0.14488639831542968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,4095,0.3765408039093018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,1,0.05263839960098267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,1,0.058432000875473025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,8191,0.7165679931640625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,8191,0.4449135780334473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,3,0.05232959985733032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,7,0.05280159711837769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,3,0.05853279829025269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,7,0.058376002311706546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,15,0.052297598123550414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,15,0.0585968017578125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,fp8,16383,0.8355551719665527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,31,0.05235999822616577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,4,128,1,float16,float16,16383,1.4003727912902832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,31,0.05846719741821289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,63,0.053566402196884154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,63,0.05822719931602478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,127,0.06164960265159607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,127,0.06220639944076538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,255,0.08557440042495727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,255,0.08494880199432372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,511,0.12847360372543334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,511,0.11096479892730712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,1023,0.2137615919113159
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,1023,0.16498559713363647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,2047,0.3833872079849243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,2047,0.2607872009277344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,1,0.08637440204620361
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,1,0.09916800260543823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,3,0.08738240003585815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,float16,4095,0.7220960140228272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,3,0.10049279928207397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,4,128,1,float16,fp8,4095,0.4626304149627686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,7,0.08757439851760865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,31,0.10085599422454834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,7,0.09943839907646179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,15,0.08673279881477355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,15,0.10003999471664429
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,31,0.08629279732704162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,63,0.09688000082969665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,63,0.10430400371551514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,127,0.0978879988193512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,511,0.2237071990966797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,127,0.1102992057800293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,255,0.1437376022338867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,255,0.1388208031654358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,511,0.20014560222625732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,1023,0.3910288095474243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,1,0.02029760032892227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,1023,0.3045775890350342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,1,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,3,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,7,0.021564799547195434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,3,0.021459199488162994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,float16,2047,0.7321824073791504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,7,0.020419199764728547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,4,128,1,float16,fp8,2047,0.4894144058227539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,63,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,15,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,15,0.02157119959592819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,31,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,31,0.021643200516700746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,63,0.02125599980354309
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,127,0.02136960029602051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,127,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,255,0.022492800652980805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,255,0.02339999973773956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,511,0.025547200441360475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,511,0.0271263986825943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,4095,0.040582400560379026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,1023,0.027928000688552855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,1023,0.02764959931373596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,2047,0.029736000299453735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,2047,0.03152480125427246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,4095,0.049542400240898135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,32767,0.20140480995178223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,8191,0.0714959979057312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,8191,0.05723680257797241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,float16,16383,0.11597599983215331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,16383,0.0823087990283966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,4,128,1,float16,fp8,32767,0.1335968017578125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,1,0.1488752007484436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,1,0.17587200403213502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,3,0.14853119850158691
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,3,0.17604800462722778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,7,0.15024319887161255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,7,0.17471840381622314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,15,0.14941120147705078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,15,0.1745952010154724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,31,0.15534240007400513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,31,0.17730400562286378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,63,0.15787359476089477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,63,0.18875679969787598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,127,0.16720319986343385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,127,0.191702401638031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,255,0.24063360691070557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,255,0.24488799571990966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,511,0.4152048110961914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,511,0.3666896104812622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,1,0.2744240045547485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,1,0.3219583988189697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,float16,1023,0.7365647792816162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,3,0.2739311933517456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,4,128,1,float16,fp8,1023,0.5652688026428223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,3,0.3226560115814209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,7,0.2726207971572876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,7,0.32246079444885256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,15,0.27876479625701905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,15,0.322708797454834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,31,0.2838736057281494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,31,0.34045920372009275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,63,0.2882911920547485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,63,0.34523520469665525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,127,0.2988672018051147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,127,0.3486191987991333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,255,0.43468480110168456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,1,0.020630399882793426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,1,0.021751999855041504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,255,0.4513519763946533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,3,0.020582400262355804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,3,0.02154559940099716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,7,0.020390400290489198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,float16,511,0.8024448394775391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,7,0.021652799844741822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,4,128,1,float16,fp8,511,0.6899407863616943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,15,0.021476800739765167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,15,0.02062080055475235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,31,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,31,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,63,0.021118399500846863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,63,0.021836799383163453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,127,0.020603199303150178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,127,0.02144639939069748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,255,0.02232320010662079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,255,0.023545600473880768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,511,0.02524479925632477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,511,0.027475199103355406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,1023,0.03175520002841949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,1023,0.031220799684524535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,2047,0.04978559911251068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,2047,0.03745599985122681
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,4095,0.07208160161972046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,4095,0.05774400234222412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,8191,0.11622560024261475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,8191,0.08305760025978089
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,16383,0.20204479694366456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,16383,0.13351680040359498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,1,0.022015999257564544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,float16,32767,0.3747503995895386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,1,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,4,128,1,float16,fp8,32767,0.233787202835083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,3,0.022300800681114195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,3,0.0234607994556427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,7,0.02300799936056137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,7,0.0225055992603302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,15,0.022041599452495574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,15,0.023228800296783446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,127,0.022859199345111846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,31,0.022100800275802614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,31,0.022942399978637694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,63,0.022232000529766083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,63,0.02327360063791275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,127,0.022524799406528472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,255,0.023737600445747374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,255,0.02504960000514984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,511,0.029039999842643736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,511,0.028679999709129333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,1023,0.049481600522994995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,1023,0.04271200001239776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,2047,0.07084320187568664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,2047,0.05703359842300415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,4095,0.11482239961624145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,4095,0.08123999834060669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,8191,0.20115039348602295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,8191,0.13525279760360717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,16383,0.3755552053451538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,1,0.018203200399875642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,16383,0.2352544069290161
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,1,0.01916159987449646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,32767,0.43067522048950196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,32767,0.7215136051177978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,3,0.0191103994846344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,3,0.018059200048446654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,7,0.01814880073070526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,7,0.019068799912929535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,15,0.018377600610256194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,15,0.019246399402618408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,31,0.018755200505256652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,31,0.019014400243759156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,63,0.01835999935865402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,fp8,65535,0.8201824188232422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,63,0.019203199446201323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,2,128,1,float16,float16,65535,1.4163552284240724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,127,0.018241600692272188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,127,0.019203199446201323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,1023,0.025164800882339477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,255,0.01974560022354126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,255,0.021134400367736818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,511,0.02295359969139099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,511,0.02492319941520691
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,1023,0.0235167995095253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,2047,0.023652799427509308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,2047,0.02526719868183136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,4095,0.02383359968662262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,4095,0.0253248006105423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,8191,0.02550399899482727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,8191,0.02757279872894287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,16383,0.02720640003681183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,16383,0.02805440127849579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,32767,0.031091201305389404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,32767,0.03183520138263703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,float16,65535,0.04892640113830567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,1,0.020158399641513825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,2,128,1,float16,fp8,65535,0.04270560145378113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,1,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,3,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,3,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,7,0.019838400185108185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,7,0.02078080028295517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,15,0.020182399451732634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,15,0.021036800742149354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,31,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,31,0.021108800172805788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,63,0.02003680020570755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,63,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,127,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,127,0.021073600649833678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,255,0.02160159945487976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,255,0.02298240065574646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,511,0.024643200635910033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,511,0.026716798543930054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,1023,0.025696000456809996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,1023,0.02687999904155731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,2047,0.02577120065689087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,2047,0.02725279927253723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,4095,0.025894400477409363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,4095,0.027116799354553224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,8191,0.029256001114845276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,8191,0.02969760000705719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,16383,0.03497759997844696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,16383,0.03377920091152191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,32767,0.051425600051879884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,32767,0.03903039991855621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,float16,65535,0.07468159794807434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,2,128,1,float16,fp8,65535,0.05975840091705322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,1,0.02104319930076599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,1,0.02208160012960434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,3,0.02117920070886612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,3,0.021935999393463135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,7,0.021054400503635405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,7,0.02192640006542206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,15,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,15,0.02191520035266876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,31,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,31,0.022049599885940553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,63,0.02122559994459152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,63,0.02191520035266876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,127,0.021104000508785248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,127,0.022329600155353548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,255,0.023337599635124207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,255,0.023971199989318848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,2047,0.04343680143356323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,511,0.02590399980545044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,511,0.027772799134254456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,1023,0.030321601033210754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,1023,0.032092800736427306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,2047,0.05095999836921692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,4095,0.07289760112762451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,4095,0.058433598279953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,8191,0.11908160448074341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,8191,0.08501440286636353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,16383,0.20454881191253663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,16383,0.13737280368804933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,32767,0.3790992021560669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,3,0.02040800005197525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,3,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,32767,0.23934400081634521
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,1,0.020262399315834047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,1,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,fp8,65535,0.4320000171661377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,2,128,1,float16,float16,65535,0.7250383853912353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,7,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,7,0.021243199706077576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,15,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,15,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,31,0.02022880017757416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,31,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,63,0.020168000459671022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,63,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,127,0.020121599733829498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,127,0.021300800144672394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,255,0.0217056006193161
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,255,0.023038400709629057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,511,0.02486719936132431
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,511,0.026556798815727235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,1023,0.0257423996925354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,1023,0.02694559991359711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,2047,0.02589600086212158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,2047,0.027531200647354127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,4095,0.027689599990844728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,4095,0.02776640057563782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,32767,0.06040480136871338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,8191,0.03335680067539215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,8191,0.03356640040874481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,16383,0.05124160051345825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,16383,0.038756799697875974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,32767,0.07374560236930847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,float16,65535,0.11835039854049682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,1,0.027537599205970764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,2,128,1,float16,fp8,65535,0.0885312020778656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,1,0.029054400324821473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,3,0.027385601401329042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,3,0.02874400019645691
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,7,0.027131199836730957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,7,0.029097598791122437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,15,0.02773759961128235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,15,0.029080000519752503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,31,0.027451199293136597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,31,0.028832000494003297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,63,0.027291199564933775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,63,0.02892639935016632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,127,0.027563199400901794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,127,0.02946079969406128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,255,0.03331040143966675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,255,0.03308480083942413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,511,0.050312000513076785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,511,0.043540799617767335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,1023,0.07590879797935486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,1023,0.0633408010005951
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,2047,0.11853920221328736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,2047,0.08843839764595032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,4095,0.20363519191741944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,4095,0.13823200464248658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,8191,0.3770064115524292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,8191,0.2377471923828125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,1,0.037555199861526486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,1,0.04096480011940003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,16383,0.72084641456604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,3,0.03755039870738983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,16383,0.43920321464538575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,3,0.04066239893436432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,7,0.03752799928188324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,7,0.04089759886264801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,15,0.03755680024623871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,fp8,32767,0.8292351722717285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,31,0.037662398815155027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,31,0.04092479944229126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,2,128,1,float16,float16,32767,1.4088640213012695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,15,0.04108160138130188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,63,0.03771679997444153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,63,0.040740799903869626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,127,0.040003201365470885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,127,0.040934398770332336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,255,0.058627200126647946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,255,0.052455997467041014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,511,0.07962239980697632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,511,0.07077919840812683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,1023,0.12605600357055663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,1023,0.09884480237960816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,2047,0.14701600074768068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,2047,0.20998880863189698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,4095,0.24801599979400635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,4095,0.38506879806518557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,1,0.05870400071144104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,1,0.06462879776954651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,3,0.058569598197937014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,3,0.06482719779014587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,fp8,8191,0.44836959838867185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,2,128,1,float16,float16,8191,0.7234943866729736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,7,0.05832639932632446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,15,0.05869600176811218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,7,0.06487839818000793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,127,0.06960480213165283
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,15,0.0647823989391327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,31,0.05852159857749939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,31,0.06516159772872925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,511,0.13523839712142943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,63,0.06185600161552429
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,511,0.11776959896087646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,63,0.0653984010219574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,127,0.06761279702186584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,255,0.09348160028457642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,255,0.08765280246734619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,2047,0.3891168117523193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,1023,0.22250559329986572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,1023,0.17161279916763306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,1,0.02027200013399124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,2047,0.26617600917816164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,1,0.021400000154972076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,3,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,3,0.021083199977874757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,float16,4095,0.7365248203277588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,7,0.02125760018825531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,7,0.020121599733829498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,2,128,1,float16,fp8,4095,0.46750879287719727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,15,0.020231999456882477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,15,0.021414400637149812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,31,0.020465600490570068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,31,0.021246400475502015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,63,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,63,0.02136159986257553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,127,0.020156799256801604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,127,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,255,0.021993599832057953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,255,0.02354239970445633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,511,0.025080001354217528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,511,0.026926401257514953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,1023,0.02590239942073822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,1023,0.02733759880065918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,2047,0.027636799216270446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,2047,0.02807359993457794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,4095,0.03312320113182068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,4095,0.031780800223350524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,8191,0.051179200410842896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,8191,0.04366720020771027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,16383,0.07323679924011231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,16383,0.058937597274780276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,32767,0.11769599914550781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,32767,0.08536000251770019
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,3,0.0936896026134491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,float16,65535,0.2043855905532837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,1,0.09331520199775696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,2,128,1,float16,fp8,65535,0.1371008038520813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,1,0.10582400560379028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,3,0.10534240007400512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,7,0.09472960233688354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,7,0.1055567979812622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,15,0.09425439834594726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,15,0.10607680082321166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,31,0.09541440010070801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,31,0.106113600730896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,63,0.10147680044174194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,63,0.11074559688568116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,127,0.10679359436035156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,127,0.11856160163879395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,255,0.14943840503692626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,255,0.14539359807968139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,511,0.2331984043121338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,511,0.20618560314178466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,1023,0.39613120555877684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,1023,0.3054464101791382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,1,0.18305599689483643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,1,0.15994720458984374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,3,0.18540639877319337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,3,0.1592079997062683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,float16,2047,0.7335792064666748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,2,128,1,float16,fp8,2047,0.49547200202941893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,7,0.1595247983932495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,7,0.1834879994392395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,15,0.1600607991218567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,15,0.18262879848480223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,31,0.16506719589233398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,31,0.18322399854660035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,63,0.17696640491485596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,63,0.19712640047073365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,127,0.17848000526428223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,127,0.20170400142669678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,255,0.24953598976135255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,255,0.25433759689331054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,1,0.02043839991092682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,511,0.4144495964050293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,1,0.02152799963951111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,511,0.3819551944732666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,3,0.02041279971599579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,3,0.021342399716377258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,7,0.020619200170040132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,float16,1023,0.7410831928253174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,2,128,1,float16,fp8,1023,0.5778143882751465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,15,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,7,0.021515199542045595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,127,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,15,0.02140959948301315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,31,0.020401600003242492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,31,0.02147520035505295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,63,0.020451200008392335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,63,0.02109919935464859
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,127,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,255,0.022510400414466857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,255,0.02338559925556183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,511,0.025276800990104674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,511,0.027331200242042542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,1023,0.027700799703598022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,1023,0.028036800026893616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,2047,0.029743999242782593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,2047,0.03180319964885712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,4095,0.05019840002059937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,4095,0.040398401021957395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,8191,0.07386400103569031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,8191,0.059703999757766725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,16383,0.11822880506515503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,16383,0.08602399826049804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,32767,0.20483839511871338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,32767,0.13686879873275756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,1,0.021755200624465943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,float16,65535,0.3783168077468872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,1,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,2,128,1,float16,fp8,65535,0.234987211227417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,3,0.0219200000166893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,3,0.02253919988870621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,7,0.021931199729442595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,15,0.022252799570560457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,7,0.022963200509548188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,15,0.02261119931936264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,31,0.022044800221920013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,63,0.021756799519062044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,31,0.022460800409317017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,63,0.022635200619697572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,127,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,127,0.022734400629997254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,255,0.023622399568557738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,255,0.024799999594688416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,511,0.02672159969806671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,511,0.028379198908805848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,4095,0.07346879839897155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,1023,0.030985599756240843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,1023,0.0326335996389389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,2047,0.051841598749160764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,2047,0.0422111988067627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,16383,0.2089087963104248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,4095,0.05869439840316772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,8191,0.11910400390625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,8191,0.0869376003742218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,16383,0.1396399974822998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,32767,0.38496320247650145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,32767,0.24261760711669922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,1,0.01719360053539276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,1,0.018318399786949158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,3,0.017548799514770508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,65535,0.43805441856384275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,3,0.018188799917697906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,65535,0.730238389968872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,7,0.017382399737834932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,7,0.018246400356292724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,15,0.01735839992761612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,15,0.018193599581718446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,31,0.01735839992761612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,31,0.01828320026397705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,fp8,131071,0.8407391548156739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,63,0.017476800084114074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,63,0.01812800019979477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,255,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,1,1,128,1,float16,float16,131071,1.4264287948608398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,127,0.017092800140380858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,127,0.018308800458908082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,255,0.01891999989748001
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,511,0.02192640006542206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,511,0.02415039986371994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,1023,0.022355200350284578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,2047,0.02284960001707077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,1023,0.024371199309825897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,2047,0.023870399594306944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,4095,0.02282560020685196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,4095,0.02452960014343262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,8191,0.024873599410057068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,8191,0.026102399826049803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,16383,0.02821120023727417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,16383,0.029652801156044007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,32767,0.029543998837471008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,32767,0.030611199140548707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,65535,0.0342960000038147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,65535,0.034599998593330385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,float16,131071,0.05415840148925781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,1,1,128,1,float16,fp8,131071,0.04207679927349091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,1,0.018105599284172057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,1,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,3,0.018441599607467652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,3,0.019206400215625762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,7,0.018084800243377684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,7,0.019121600687503813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,15,0.018355199694633485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,15,0.019262400269508363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,31,0.018251200020313264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,31,0.01929599940776825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,63,0.018464000523090364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,63,0.019270400702953338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,127,0.018177600204944612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,127,0.019019199907779692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,255,0.01987680047750473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,255,0.021214400231838227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,511,0.022841599583625794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,511,0.025196799635887147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,1023,0.023785600066185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,1023,0.025110399723052977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,2047,0.023625600337982177
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,2047,0.025337600708007814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,4095,0.02396479994058609
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,4095,0.025588798522949218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,8191,0.02571200132369995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,8191,0.027470400929450987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,16383,0.030926400423049928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,16383,0.030875200033187868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,32767,0.03529280126094818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,32767,0.03503519892692566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,65535,0.052155202627182005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,65535,0.04047040045261383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,float16,131071,0.07462559938430786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,1,1,128,1,float16,fp8,131071,0.060196799039840695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,1,0.02115039974451065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,1,0.021779200434684752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,3,0.021030400693416596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,3,0.02228959947824478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,7,0.02117920070886612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,7,0.02155199944972992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,15,0.02136480063199997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,15,0.021908800303936004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,31,0.021217599511146545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,31,0.021721599996089934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,63,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,63,0.02200160026550293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,127,0.021350400149822236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,127,0.02204640060663223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,255,0.022697600722312927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,255,0.0238864004611969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,511,0.025513601303100587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,511,0.02773439884185791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,1023,0.02805120050907135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,1023,0.0286080002784729
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,2047,0.03396640121936798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,2047,0.03215200006961823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,4095,0.05095999836921692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,4095,0.04139040112495422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,8191,0.07368000149726868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,8191,0.05988159775733948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,16383,0.12120800018310547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,16383,0.08966079950332642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,32767,0.20798718929290771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,32767,0.1404639959335327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,65535,0.3837199926376343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,65535,0.24188799858093263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,1,0.02024320065975189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,3,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,1,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,3,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,float16,131071,0.7319712162017822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,7,0.019844800233840942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,7,0.021028800308704375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,1,1,128,1,float16,fp8,131071,0.44043841361999514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,15,0.020006400346755982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,15,0.020923200249671935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,31,0.020168000459671022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,31,0.02088479995727539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,63,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,63,0.02109919935464859
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,127,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,127,0.0209184005856514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,255,0.02173279970884323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,255,0.022998400032520294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,511,0.0246288001537323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,2047,0.026910400390625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,511,0.02669120132923126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,1023,0.02579360008239746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,1023,0.02687999904155731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,2047,0.025553598999977112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,4095,0.02614560127258301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,4095,0.02747200131416321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,8191,0.02932479977607727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,8191,0.02961280047893524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,16383,0.036371201276779175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,16383,0.036657598614692685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,32767,0.05457119941711426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,32767,0.04218400120735168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,65535,0.07692800164222717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,65535,0.06289119720458984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,float16,131071,0.12259360551834106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,1,1,128,1,float16,fp8,131071,0.09085440039634704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,1,0.02353920042514801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,1,0.024271999299526215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,3,0.02396959960460663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,3,0.02476000040769577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,7,0.02346560060977936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,7,0.024201600253582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,15,0.024054400622844696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,15,0.024400000274181367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,31,0.023571200668811798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,31,0.024296000599861145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,63,0.023928000032901763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,63,0.024697600305080412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,127,0.02361920028924942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,127,0.02444159984588623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,255,0.025259199738502502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,255,0.026419198513031004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,511,0.030604800581932066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,511,0.030115199089050294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,1023,0.05092319846153259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,1023,0.037857601046562196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,2047,0.07220159769058228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,2047,0.05801759958267212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,4095,0.11593760251998901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,4095,0.08339040279388428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,8191,0.20235519409179686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,8191,0.13695520162582397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,16383,0.37902719974517823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,16383,0.23482398986816405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,1,0.030582401156425475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,1,0.03213120102882385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,32767,0.4304512023925781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,32767,0.7239327907562256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,3,0.030423998832702637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,15,0.03040800094604492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,3,0.03189919888973236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,7,0.030476799607276915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,7,0.03201119899749756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,15,0.032358399033546446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,31,0.03027839958667755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,fp8,65535,0.8288703918457031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,31,0.03213120102882385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,63,0.030169600248336793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,1,1,128,1,float16,float16,65535,1.4222991943359375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,63,0.03214240074157715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,127,0.030324798822402955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,127,0.032144001126289366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,255,0.034415999054908754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,255,0.03605599999427796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,511,0.0542959988117218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,511,0.04582079946994781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,1023,0.07957760095596314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,1023,0.06655679941177368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,2047,0.12265440225601196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,2047,0.09028159976005554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,4095,0.20729439258575438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,4095,0.14143520593643188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,8191,0.3789216041564941
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,8191,0.24509119987487793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,1,0.04410400092601776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,1,0.04721759855747223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,3,0.044670400023460385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,fp8,16383,0.4398960113525391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,1,1,128,1,float16,float16,16383,0.7299392223358154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,3,0.04668959975242615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,31,0.04499039947986603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,7,0.044835200905799864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,7,0.047619199752807616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,15,0.044387200474739076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,15,0.04807359874248505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,31,0.04815999865531921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,63,0.04392000138759613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,63,0.04831520020961762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,127,0.04486719965934753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,127,0.04740639925003052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,255,0.06523680090904235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,255,0.05899360179901123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,511,0.08697599768638611
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,511,0.07638400197029113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,1023,0.13408160209655762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,1023,0.1068608045578003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,2047,0.21652801036834718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,2047,0.1547808051109314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,4095,0.38729760646820066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,4095,0.25728321075439453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,1,0.020315200090408325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,1,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,3,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,float16,8191,0.7364848136901856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,3,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,7,0.020454399287700653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,1,1,128,1,float16,fp8,8191,0.4569744110107422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,7,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,15,0.020449599623680113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,15,0.021087999641895293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,31,0.020158399641513825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,31,0.020919999480247496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,63,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,63,0.021356800198554994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,127,0.020203199982643128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,127,0.02117439955472946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,255,0.02199520021677017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,255,0.02306720018386841
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,511,0.024716800451278685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,511,0.026425600051879883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,1023,0.025731199979782106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,1023,0.027643200755119324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,2047,0.025953599810600282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,2047,0.027143999934196472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,4095,0.027744001150131224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,4095,0.028190401196479798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,8191,0.03270559906959534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,8191,0.03294720053672791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,16383,0.0548687994480133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,16383,0.047835201025009155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,32767,0.0767408013343811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,32767,0.06349440217018128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,1,0.07223520278930665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,65535,0.12266720533370971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,65535,0.09075040221214295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,float16,131071,0.212391996383667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,7,0.07174239754676819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,15,0.06604959964752197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,1,0.0656607985496521
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,15,0.07159039974212647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,1,1,128,1,float16,fp8,131071,0.14418400526046754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,3,0.06577600240707397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,3,0.07206079959869385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,7,0.06658239960670471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,31,0.06613759994506836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,31,0.07200319766998291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,63,0.06784960031509399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,63,0.07184640169143677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,127,0.07637760043144226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,127,0.0762000024318695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,1023,0.22478559017181396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,255,0.09758719801902771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,255,0.09536479711532593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,511,0.13989280462265014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,511,0.12609920501708985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,1023,0.177401602268219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,2047,0.3925071954727173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,2047,0.2734960079193115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,1,0.10668799877166749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,1,0.11736479997634888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,3,0.10519520044326783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,fp8,4095,0.47278881072998047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,1,1,128,1,float16,float16,4095,0.734227180480957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,3,0.11755520105361938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,7,0.10545439720153808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,7,0.11878080368041992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,15,0.10572160482406616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,15,0.11718239784240722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,31,0.10517599582672119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,31,0.11639679670333862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,63,0.1152608036994934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,63,0.12203199863433838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,127,0.12118560075759888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,511,0.23134078979492187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,127,0.12958240509033203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,255,0.15787839889526367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,255,0.1579311966896057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,511,0.21950719356536866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,1023,0.3983743906021118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,1,0.02046400010585785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,1023,0.32279200553894044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,1,0.021305599808692934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,3,0.02048960030078888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,7,0.0205487996339798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,fp8,2047,0.5124735832214355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,1,1,128,1,float16,float16,2047,0.7341008186340332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,3,0.021214400231838227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,7,0.021393600106239318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,15,0.020420800149440765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,15,0.021198399364948273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,31,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,31,0.021272000670433045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,63,0.021028800308704375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,63,0.02014079988002777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,127,0.020510399341583253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,127,0.021406400203704833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,255,0.022150400280952453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,255,0.023491199314594268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,2047,0.02781279981136322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,2047,0.027716800570487976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,511,0.024801599979400634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,511,0.02693760097026825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,1023,0.025863999128341676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,1023,0.027744001150131224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,4095,0.031095999479293823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,4095,0.031667199730873105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,8191,0.05196800231933594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,8191,0.039771199226379395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,16383,0.07703840136528015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,65535,0.1415343999862671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,16383,0.06258879899978638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,32767,0.12033920288085938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,32767,0.0891152024269104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,65535,0.20749280452728272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,1,0.022235199809074402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,1,0.02324959933757782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,float16,131071,0.38492159843444823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,1,1,128,1,float16,fp8,131071,0.24214398860931396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,3,0.022998400032520294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,31,0.02292799949645996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,3,0.022939200699329376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,7,0.022188800573349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,7,0.02300640046596527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,15,0.02221119999885559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,15,0.023160000145435334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,31,0.022995199263095855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,63,0.022436800599098205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,511,0.028598400950431823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,63,0.023127999901771546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,127,0.022617599368095397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,127,0.023151999711990355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,255,0.02439039945602417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,255,0.025012800097465517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,511,0.02732959985733032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,1023,0.045772799849510194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,1023,0.035899201035499574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,float16,2047,0.05836319923400879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,1,128,1,float16,fp8,2047,0.05094559788703919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,1,0.02210880070924759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,1,0.02306720018386841
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,3,0.021972799301147462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,3,0.0229312002658844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,7,0.022416000068187714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,7,0.0230880007147789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,15,0.022204799950122832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,15,0.02290239930152893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,31,0.022198399901390074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,31,0.022868800163269042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,63,0.022171199321746826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,63,0.022947199642658234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,127,0.02319519966840744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,127,0.023012800514698027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,255,0.02416960000991821
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,255,0.024878400564193725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,511,0.03036159873008728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,511,0.028547200560569762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,1023,0.05161280035972595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,1023,0.04184159934520722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,float16,2047,0.07381600141525269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,2,128,1,float16,fp8,2047,0.05820159912109375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,1,0.025889599323272706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,1,0.02747200131416321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,3,0.025884801149368288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,3,0.02754719853401184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,7,0.02608320116996765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,7,0.02778240144252777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,15,0.025947201251983642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,15,0.027632001042366027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,31,0.025987198948860167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,31,0.027611199021339416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,63,0.026078400015830994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,63,0.027713599801063537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,127,0.026371198892593383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,127,0.02773439884185791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,255,0.0327567994594574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,255,0.031646400690078735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,511,0.05064799785614014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,511,0.042519998550415036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,1023,0.07839679718017578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,1023,0.063401597738266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,float16,2047,0.12114080190658569
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,4,128,1,float16,fp8,2047,0.08769279718399048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,1,0.03340800106525421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,1,0.03650560081005096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,3,0.03338080048561096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,3,0.03657119870185852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,7,0.03375999927520752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,7,0.036545601487159726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,15,0.03324800133705139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,15,0.03675520122051239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,31,0.033542400598526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,31,0.036559998989105225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,63,0.03370879888534546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,63,0.03696480095386505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,127,0.03930079936981201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,127,0.03651520013809204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,1023,0.09601439833641053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,255,0.056176000833511354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,255,0.04870879948139191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,511,0.0766592025756836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,511,0.0674399971961975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,1023,0.12474720478057862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,1,0.017785599827766417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,float16,2047,0.2107151985168457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,128,8,128,1,float16,fp8,2047,0.1444767951965332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,1,0.018561600148677825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,3,0.017763200402259826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,3,0.0183119997382164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,63,0.01786399930715561
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,7,0.017739200592041017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,7,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,15,0.017846399545669557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,15,0.018771199882030486
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,31,0.017684799432754517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,31,0.018529599905014037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,63,0.01844000071287155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,127,0.01793439984321594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,127,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,255,0.019534400105476378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,255,0.02059520035982132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,511,0.022368000447750093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,511,0.02459519952535629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,1023,0.02284640073776245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,1023,0.024073599278926848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,float16,2047,0.02316959947347641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,1,128,1,float16,fp8,2047,0.02433920055627823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,1,0.018438400328159334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,1,0.019166399538517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,3,0.018292799592018127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,3,0.019097599387168884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,7,0.018254399299621582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,7,0.018963199853897095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,15,0.0182096004486084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,15,0.01906079947948456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,31,0.0184688001871109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,255,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,31,0.01912800073623657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,63,0.018423999845981597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,63,0.01924320012331009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,127,0.018182399868965148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,127,0.019097599387168884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,255,0.01988160014152527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,511,0.022961600124835967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,511,0.025084799528121947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,3,0.019998399913311003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,1023,0.023148800432682037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,3,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,1023,0.024145600199699403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,float16,2047,0.023209600150585173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,2,128,1,float16,fp8,2047,0.02465119957923889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,1,0.01991039961576462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,1,0.020769600570201874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,7,0.019942399859428406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,7,0.020545600354671477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,15,0.01985439956188202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,15,0.020710399746894835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,31,0.020054399967193604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,31,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,63,0.019969600439071655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,63,0.020740799605846405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,127,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,127,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,255,0.02165919989347458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,255,0.022921599447727203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,511,0.024475200474262236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,511,0.02648319900035858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,1023,0.024379199743270873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,1023,0.02559039890766144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,float16,2047,0.02465279996395111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,4,128,1,float16,fp8,2047,0.026097598671913146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,1,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,1,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,3,0.019787199795246124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,3,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,7,0.020183999836444855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,7,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,15,0.020095999538898467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,15,0.020803199708461763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,31,0.020068800449371337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,31,0.020596800744533537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,63,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,63,0.021006399393081666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,127,0.01993599981069565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,127,0.020871999859809875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,255,0.021700799465179443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,255,0.022737599909305573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,511,0.024366399645805357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,511,0.026952001452445983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,1023,0.024505600333213806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,1023,0.025678399205207824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,float16,2047,0.025036799907684325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,128,8,128,1,float16,fp8,2047,0.02627040147781372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,1,0.018531200289726258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,1,0.019361600279808044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,3,0.018452799320220946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,3,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,7,0.018518400192260743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,7,0.019420799612998963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,15,0.018532800674438476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,15,0.0193792000412941
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,31,0.01844000071287155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,31,0.019412800669670105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,63,0.018811200559139252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,63,0.019420799612998963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,127,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,127,0.01956160068511963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,255,0.02019840031862259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,255,0.02144480049610138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,511,0.023107199370861052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,511,0.02549920082092285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,1023,0.02358720004558563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,1023,0.025080001354217528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,float16,2047,0.02398560047149658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,1,128,1,float16,fp8,2047,0.025183999538421632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,1,0.0201664000749588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,15,0.021027199923992157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,1,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,3,0.020126399397850037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,3,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,7,0.020204800367355346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,7,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,15,0.020131200551986694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,31,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,31,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,63,0.019964799284934998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,63,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,127,0.020286400616168977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,127,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,255,0.021740800142288207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,255,0.02293439954519272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,1,0.020052799582481386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,511,0.024801599979400634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,511,0.026694399118423463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,1023,0.02499839961528778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,1023,0.026158401370048524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,float16,2047,0.025060799717903138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,2,128,1,float16,fp8,2047,0.026505601406097413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,1,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,3,0.019985599815845488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,3,0.02114879935979843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,7,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,7,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,15,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,15,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,31,0.02027200013399124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,31,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,63,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,63,0.021007999777793884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,127,0.02004159986972809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,127,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,2047,0.025303998589515687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,255,0.02172800004482269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,255,0.02295999974012375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,511,0.024809600412845613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,511,0.026419198513031004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,float16,1023,0.02467840015888214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,1023,0.026188799738883974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,4,128,1,float16,fp8,2047,0.026233598589897156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,7,0.020974400639533996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,1,0.020054399967193604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,1,0.021191999316215515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,3,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,3,0.021078400313854218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,7,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,15,0.020052799582481386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,15,0.02101919949054718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,31,0.020121599733829498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,31,0.021268799901008606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,63,0.020155200362205507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,63,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,127,0.020110400021076204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,127,0.020838400721549986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,255,0.0221903994679451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,255,0.023131200671195985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,511,0.025206398963928223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,511,0.02659359872341156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,1023,0.02499520033597946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,1023,0.0262719988822937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,float16,2047,0.02667039930820465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,128,8,128,1,float16,fp8,2047,0.027268800139427184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,1,0.021460799872875212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,1,0.022460800409317017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,3,0.021580800414085388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,3,0.022259199619293214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,7,0.021428799629211424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,7,0.022065599262714387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,15,0.02157440036535263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,15,0.022411200404167175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,31,0.021558399498462676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,31,0.02255360037088394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,63,0.021457600593566894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,63,0.02237759977579117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,127,0.021457600593566894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,127,0.022334399819374084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,255,0.02325119972229004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,255,0.024404799938201903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,511,0.026280000805854797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,511,0.027804800868034364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,1023,0.0304639995098114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,1023,0.03049440085887909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,float16,2047,0.04138559997081757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,1,128,1,float16,fp8,2047,0.03455359935760498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,1,0.021990400552749634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,1,0.021380800008773803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,3,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,3,0.02213120013475418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,7,0.021070399880409242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,7,0.021955199539661407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,15,0.02112479954957962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,15,0.02211039960384369
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,31,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,255,0.02404800057411194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,31,0.022089600563049316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,63,0.021388800442218782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,63,0.0221343994140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,127,0.021473599970340727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,127,0.021913599967956544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,255,0.023240000009536743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,511,0.026260799169540404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,511,0.0276528000831604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,1023,0.03490079939365387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,1023,0.0322735995054245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,float16,2047,0.05305600166320801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,2,128,1,float16,fp8,2047,0.04449920058250427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,1,0.02138720005750656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,1,0.022310400009155275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,3,0.021423999965190888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,3,0.022294400632381438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,7,0.021539199352264404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,7,0.02248159945011139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,15,0.02149759978055954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,15,0.022409600019454957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,31,0.02136159986257553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,31,0.0223471999168396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,255,0.02450399994850159
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,63,0.02140959948301315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,63,0.022307200729846953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,127,0.021831999719142913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,127,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,255,0.02344159930944443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,511,0.02685439884662628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,511,0.028139200806617738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,1023,0.05008479952812195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,1023,0.037990400195121767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,float16,2047,0.07295039892196656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,4,128,1,float16,fp8,2047,0.05720000267028809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,1,0.025571200251579284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,1,0.0267984002828598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,3,0.025016000866889952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,3,0.027059200406074523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,7,0.025209599733352663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,7,0.02688640058040619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,127,0.025577598810195924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,15,0.025246399641036987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,127,0.026929599046707154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,15,0.026788800954818726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,31,0.025537601113319396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,31,0.026903998851776124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,63,0.025089600682258607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,63,0.026697599887847902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,255,0.031083199381828307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,255,0.030995199084281923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,511,0.04919840097427368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,511,0.04193919897079468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,1023,0.07583839893341064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,1023,0.06134080290794373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,1,0.02027679979801178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,fp8,2047,0.08619199991226197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,128,8,128,1,float16,float16,2047,0.11765120029449463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,7,0.02128479927778244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,1,0.021096000075340272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,15,0.021238400042057036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,3,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,3,0.021279999613761903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,7,0.020230400562286376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,15,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,31,0.020372800529003143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,31,0.021348799765110015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,63,0.020315200090408325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,63,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,127,0.020377600193023683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,127,0.021116800606250763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,255,0.021966400742530822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,255,0.02329760044813156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,511,0.025361600518226623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,511,0.026929599046707154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,1023,0.025521600246429445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,1023,0.02693440020084381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,float16,2047,0.025944000482559203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,1,128,1,float16,fp8,2047,0.02732959985733032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,1,0.02040639966726303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,1,0.02111999988555908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,3,0.02016319930553436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,3,0.02106879949569702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,7,0.020156799256801604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,7,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,15,0.020155200362205507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,15,0.021238400042057036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,31,0.0204352006316185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,31,0.02112479954957962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,63,0.020230400562286376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,63,0.0210207998752594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,127,0.020132799446582795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,127,0.02111999988555908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,255,0.021852800250053407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,255,0.02327360063791275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,2047,0.026795199513435362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,511,0.024937599897384644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,511,0.02661919891834259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,1023,0.02512960135936737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,fp8,1023,0.026495999097824095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,2,128,1,float16,float16,2047,0.02581920027732849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,1,0.020257599651813507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,1,0.02110079973936081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,3,0.020180800557136537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,3,0.021164800226688384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,7,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,7,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,15,0.020268799364566804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,15,0.020971199870109557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,31,0.020244799554347992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,31,0.021396799385547637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,63,0.020127999782562255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,63,0.02115360051393509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,127,0.020270399749279022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,127,0.02102400064468384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,255,0.021887999773025513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,255,0.02287199944257736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,511,0.024910399317741395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,511,0.02715519964694977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,1023,0.025222399830818178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,1023,0.02637600004673004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,float16,2047,0.026943999528884887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,4,128,1,float16,fp8,2047,0.027166399359703063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,1,0.020457600057125092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,1,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,3,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,3,0.021505600214004515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,7,0.020427200198173522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,7,0.021356800198554994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,15,0.0205935999751091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,15,0.021155199408531188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,31,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,31,0.021108800172805788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,63,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,63,0.021641600131988525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,127,0.020423999428749083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,127,0.02130240052938461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,255,0.0222448006272316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,255,0.02317280024290085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,511,0.025356799364089966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,511,0.026731199026107787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,1023,0.026526400446891786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,1023,0.026888000965118408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,float16,2047,0.030151998996734618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,128,8,128,1,float16,fp8,2047,0.030446401238441466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,1,0.024209600687026978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,1,0.025180798768997193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,3,0.024556800723075867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,3,0.025249600410461426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,7,0.02415039986371994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,7,0.02531839907169342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,15,0.024409599602222443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,15,0.02540639936923981
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,31,0.024038399755954742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,31,0.025400000810623168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,63,0.0247296005487442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,63,0.02508319914340973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,127,0.025440001487731935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,127,0.025248000025749208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,255,0.028172799944877626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,255,0.027260801196098326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,511,0.034160000085830686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,511,0.03094879984855652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,float16,1023,0.05522879958152771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,3,0.02933120131492615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,1,128,1,float16,fp8,1023,0.04648320078849792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,1,0.027646398544311522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,1,0.029411199688911437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,3,0.02746239900588989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,7,0.02757120132446289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,7,0.0293071985244751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,15,0.02771199941635132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,15,0.029526400566101074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,31,0.02767840027809143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,31,0.029414400458335876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,63,0.027665600180625916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,63,0.02940639853477478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,127,0.02808000147342682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,127,0.029702401161193846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,255,0.0397599995136261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,255,0.03352159857749939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,511,0.05295680165290832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,511,0.0461760014295578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,float16,1023,0.08155199885368347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,2,128,1,float16,fp8,1023,0.06700000166893005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,1,0.03506560027599335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,1,0.0384656012058258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,3,0.03529919981956482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,3,0.038503998517990114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,7,0.03511840105056763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,7,0.038185599446296695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,15,0.03505600094795227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,15,0.03851040005683899
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,31,0.03529280126094818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,31,0.038731199502944944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,63,0.03565439879894257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,63,0.03823359906673431
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,127,0.04225119948387146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,127,0.038332799077034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,255,0.058499199151992795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,255,0.052718400955200195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,511,0.08077759742736816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,511,0.06935679912567139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,float16,1023,0.12953920364379884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,4,128,1,float16,fp8,1023,0.0989247977733612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,1,0.049384000897407535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,1,0.05602080225944519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,3,0.04967199862003326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,3,0.05596320033073425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,7,0.04971520006656647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,7,0.055961602926254274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,15,0.04943200051784515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,15,0.05589600205421448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,127,0.06237279772758484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,31,0.04995039999485016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,31,0.05589280128479004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,511,0.13012640476226806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,63,0.05644959807395935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,63,0.0565887987613678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,127,0.06345120072364807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,255,0.08708800077438354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,255,0.08198879957199097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,511,0.11093920469284058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,1,0.03143360018730164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,fp8,1023,0.1643903970718384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,128,8,128,1,float16,float16,1023,0.22195520401000976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,1,0.03309600055217743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,3,0.03138880133628845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,3,0.03315680027008057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,7,0.03101919889450073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,7,0.033199998736381534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,15,0.03136320114135742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,15,0.03293280005455017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,31,0.03100320100784302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,31,0.03324320018291473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,63,0.03224320113658905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,63,0.03308959901332855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,127,0.03383519947528839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,127,0.03345440030097961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,float16,255,0.046928000450134275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,1,128,1,float16,fp8,255,0.0373663991689682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,1,0.037992000579833984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,1,0.04177440106868744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,3,0.038145598769187924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,3,0.04162240028381348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,7,0.038046398758888246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,7,0.04183999896049499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,15,0.03795520067214966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,15,0.04169439971446991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,31,0.03874239921569824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,31,0.041843199729919435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,63,0.03936800062656402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,63,0.041607999801635744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,127,0.050627201795578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,127,0.0425680011510849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,float16,255,0.061768001317977904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,2,128,1,float16,fp8,255,0.05915679931640625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,1,0.0528544008731842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,1,0.058748799562454226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,3,0.052697598934173584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,3,0.05921599864959717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,7,0.05248640179634094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,7,0.059166401624679565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,15,0.05278239846229553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,15,0.05915039777755737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,31,0.05262560248374939
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,31,0.05909919738769531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,63,0.06223679780960083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,63,0.06023520231246948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,127,0.06755040287971496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,127,0.07088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,float16,255,0.08803039789199829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,3,0.09455519914627075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,4,128,1,float16,fp8,255,0.08528800010681152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,1,0.08104159832000732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,1,0.09463199973106384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,3,0.08077600002288818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,31,0.08826239705085755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,7,0.08127040266990662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,7,0.09496480226516724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,15,0.08152639865875244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,127,0.09773120284080505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,15,0.09412800073623658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,31,0.09463840126991271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,63,0.09120000004768372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,63,0.10451359748840332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,127,0.11414719820022583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,float16,255,0.14495840072631835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,128,8,128,1,float16,fp8,255,0.13726719617843627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,1,0.04806239902973175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,1,0.049476799368858335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,3,0.04836640059947968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,3,0.04994400143623352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,7,0.04979679882526398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,63,0.05783680081367493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,63,0.05326399803161621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,7,0.050100797414779664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,127,0.06220800280570984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,15,0.05161759853363037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,15,0.050835198163986205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,float16,31,0.04993120133876801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,31,0.05091680288314819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,1,128,1,float16,fp8,127,0.05852800011634827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,1,0.06371679902076721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,1,0.06779360175132751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,3,0.06308000087738037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,3,0.06701920032501221
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,7,0.06388959884643555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,7,0.06658719778060913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,15,0.06461600065231324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,15,0.06702560186386108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,31,0.06786879897117615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,31,0.06962559819221496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,63,0.07323520183563233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,63,0.07521119713783264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,float16,127,0.07920479774475098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,2,128,1,float16,fp8,127,0.07880319952964783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,1,0.09281439781188965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,1,0.10180480480194092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,3,0.09475679993629456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,3,0.1018064022064209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,7,0.0944703996181488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,7,0.1024783968925476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,15,0.09673759937286378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,15,0.10208799839019775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,31,0.0997983992099762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,31,0.1066864013671875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,1,0.16917920112609863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,63,0.10200159549713135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,63,0.11409599781036377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,7,0.15280640125274658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,float16,127,0.11016000509262085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,4,128,1,float16,fp8,127,0.11711839437484742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,1,0.15392639636993408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,3,0.15348479747772217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,3,0.16948000192642212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,7,0.17132480144500734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,15,0.15695359706878662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,15,0.1731824040412903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,31,0.15922240018844605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,31,0.18303040266036988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,63,0.16044800281524657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,63,0.18693759441375732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,float16,127,0.1709663987159729
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,128,8,128,1,float16,fp8,127,0.2017807960510254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,1,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,1,0.02131199985742569
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,3,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,3,0.021216000616550445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,7,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,7,0.0217056006193161
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,15,0.02056960016489029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,15,0.021529600024223328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,31,0.02043039947748184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,31,0.02129279971122742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,63,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,63,0.021324799954891206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,127,0.02105119973421097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,127,0.021641600131988525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,255,0.022439999878406523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,255,0.023227199912071228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,511,0.025390401482582092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,511,0.02696479856967926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,1023,0.025979200005531312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,1023,0.027635198831558228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,float16,2047,0.02706719934940338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,1,128,1,float16,fp8,2047,0.027432000637054442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,7,0.021400000154972076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,1,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,1,0.02133280038833618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,3,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,3,0.02107200026512146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,7,0.020691199600696562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,15,0.02056960016489029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,15,0.02141599953174591
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,31,0.020483200252056123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,31,0.021488000452518464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,63,0.02035840004682541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,63,0.02110079973936081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,127,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,127,0.021512000262737273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,255,0.022273600101470947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,255,0.023177599906921385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,511,0.02515679895877838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,511,0.02699199914932251
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,1023,0.025670400261878966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,1023,0.026817598938941957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,float16,2047,0.027456000447273254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,2,128,1,float16,fp8,2047,0.027619200944900512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,1,0.020369599759578704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,1,0.021350400149822236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,3,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,3,0.02118239998817444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,7,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,7,0.021583999693393707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,15,0.020695999264717102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,15,0.021566399931907655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,31,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,31,0.02138399928808212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,63,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,63,0.02162880003452301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,127,0.02093279957771301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,127,0.02168480008840561
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,255,0.022524799406528472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,255,0.023555199801921844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,511,0.02504960000514984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,511,0.02709600031375885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,1023,0.026771199703216553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,1023,0.02697120010852814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,float16,2047,0.029558399319648744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,4,128,1,float16,fp8,2047,0.030633598566055298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,1,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,1,0.021588799357414246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,3,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,3,0.02147040069103241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,7,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,7,0.021615999937057494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,15,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,15,0.021615999937057494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,31,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,31,0.0217631995677948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,63,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,63,0.021473599970340727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,127,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,127,0.021524800360202788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,255,0.022550399601459502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,255,0.023659199476242065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,511,0.02571359872817993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,511,0.02724800109863281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,1023,0.030036801099777223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,1023,0.03060320019721985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,float16,2047,0.05030879974365234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,128,8,128,1,float16,fp8,2047,0.04008159935474396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,1,0.08318719863891602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,1,0.08611199855804444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,3,0.08307200074195861
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,3,0.08588160276412964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,7,0.08414400219917298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,7,0.08613759875297547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,15,0.0844223976135254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,15,0.08658559918403626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,31,0.085452800989151
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,31,0.08732479810714722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,float16,63,0.08834559917449951
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,1,128,1,float16,fp8,63,0.08909119963645935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,1,0.10875680446624755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,1,0.12072479724884033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,3,0.1101151943206787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,3,0.12072960138320923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,7,0.1098207950592041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,7,0.12030240297317504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,15,0.11038080453872681
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,15,0.12055679559707641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,31,0.11237280368804932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,31,0.12200319766998291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,float16,63,0.11617759466171265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,2,128,1,float16,fp8,63,0.1251423954963684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,1,0.16570719480514526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,1,0.18971519470214843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,3,0.16535840034484864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,3,0.19050559997558594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,7,0.16581759452819825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,7,0.19037920236587524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,15,0.16671680212020873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,15,0.19094719886779785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,31,0.168612802028656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,31,0.19137439727783204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,float16,63,0.17221759557723998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,4,128,1,float16,fp8,63,0.1972815990447998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,1,0.2868527889251709
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,1,0.33748319149017336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,3,0.2863487958908081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,3,0.33628480434417723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,15,0.3378096103668213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,7,0.2851599931716919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,7,0.3392064094543457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,31,0.34019999504089354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,15,0.28725600242614746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,31,0.29105439186096194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,float16,63,0.2929088115692139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,1,0.13197120428085327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,7,0.13352320194244385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,128,8,128,1,float16,fp8,63,0.34363999366760256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,1,0.14184319972991943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,3,0.13292160034179687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,31,0.1355728030204773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,3,0.1410207986831665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,7,0.14086079597473145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,float16,15,0.1343600034713745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,15,0.1427343964576721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,1,128,1,float16,fp8,31,0.1430191993713379
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,1,0.1851631999015808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,1,0.20738561153411866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,3,0.18535840511322021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,3,0.20685598850250245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,7,0.18522559404373168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,7,0.2077552080154419
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,15,0.18664319515228273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,15,0.20907840728759766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,float16,31,0.18787839412689208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,2,128,1,float16,fp8,31,0.21039841175079346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,1,0.29871358871459963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,1,0.34749760627746584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,3,0.2980416059494019
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,3,0.34818079471588137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,7,0.2993504047393799
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,7,0.34838879108428955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,15,0.3005183935165405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,15,0.3507983922958374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,float16,31,0.30379519462585447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,4,128,1,float16,fp8,31,0.35268800258636473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,1,0.534715223312378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,1,0.6312992095947265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,3,0.5359856128692627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,15,0.5370816230773926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,3,0.6319536209106446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,7,0.5363647937774658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,7,0.6330543994903565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,1,0.02056639939546585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,15,0.6317808151245117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,float16,31,0.5400656223297119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,1,0.021480000019073485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,3,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,128,8,128,1,float16,fp8,31,0.6350560188293457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,3,0.02159679979085922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,7,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,7,0.02128479927778244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,15,0.02060000002384186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,15,0.021428799629211424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,31,0.02052319943904877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,31,0.021380800008773803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,63,0.02060000002384186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,63,0.021609599888324737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,127,0.02129279971122742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,127,0.02149440050125122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,255,0.022494399547576906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,255,0.023550400137901308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,511,0.0251120001077652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,511,0.027158400416374205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,1023,0.026859200000762938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,1023,0.027926400303840637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,float16,2047,0.02881920039653778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,1,128,1,float16,fp8,2047,0.02969920039176941
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,1,0.020311999320983886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,1,0.021268799901008606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,3,0.02046239972114563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,3,0.021320000290870667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,7,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,7,0.021631999313831328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,15,0.02080959975719452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,15,0.021531200408935545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,31,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,31,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,63,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,63,0.02099999934434891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,127,0.02099840044975281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,127,0.021687999367713928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,255,0.02265920042991638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,255,0.023499199748039247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,511,0.025459200143814087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,511,0.027140799164772033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,1023,0.02797439992427826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,1023,0.027745598554611207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,float16,2047,0.03347519934177399
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,2,128,1,float16,fp8,2047,0.03142080008983612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,1,0.0208079993724823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,1,0.021631999313831328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,3,0.02070239931344986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,3,0.02143840044736862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,7,0.02064639925956726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,7,0.021590399742126464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,15,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,15,0.021558399498462676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,31,0.02085600048303604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,31,0.021550400555133818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,63,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,63,0.021697600185871125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,127,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,127,0.021563200652599333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,255,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,255,0.023926399648189545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,511,0.0257999986410141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,511,0.027131199836730957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,1023,0.030107200145721436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,1023,0.030396801233291627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,float16,2047,0.05099200010299683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,4,128,1,float16,fp8,2047,0.041222399473190306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,1,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,1,0.02184319943189621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,3,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,3,0.02186720073223114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,7,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,7,0.021964800357818604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,15,0.021129600703716278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,15,0.021854400634765625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,31,0.021294400095939636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,31,0.021796800196170807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,63,0.02095839977264404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,63,0.021859200298786165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,127,0.021356800198554994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,127,0.02200320065021515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,255,0.022935999929904936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,255,0.024135999381542206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,511,0.029702401161193846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,511,0.027809599041938783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,1023,0.04875200092792511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,1023,0.03750559985637665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,float16,2047,0.07067040205001832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,128,8,128,1,float16,fp8,2047,0.05665919780731201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,1,0.022067199647426605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,1,0.023155200481414794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,3,0.02215359956026077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,3,0.022793599963188173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,7,0.022148799896240235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,7,0.022945599257946016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,15,0.022375999391078948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,15,0.022832000255584718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,31,0.02205120027065277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,31,0.02326720058917999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,63,0.022468799352645875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,63,0.023207999765872955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,127,0.02245119959115982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,127,0.022988800704479218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,255,0.024505600333213806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,255,0.025124800205230714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,511,0.027246400713920593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,511,0.029022398591041564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,1023,0.042217600345611575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,1023,0.03541440069675446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,float16,2047,0.05617920160293579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,7,0.023292799293994904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,1,128,1,float16,fp8,2047,0.04871360063552856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,1,0.02218240052461624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,1,0.023183999955654143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,3,0.022115199267864226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,3,0.023151999711990355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,7,0.02234880030155182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,15,0.022140799462795256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,15,0.023321600258350374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,31,0.022163200378417968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,31,0.023153600096702576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,63,0.022195200622081756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,63,0.02311840057373047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,127,0.022881600260734557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,127,0.023268799483776092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,255,0.024691200256347655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,255,0.025464001297950744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,511,0.029787200689315795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,511,0.02905920147895813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,1023,0.05139039754867554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,1023,0.04117439985275269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,float16,2047,0.07279840111732483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,2,128,1,float16,fp8,2047,0.05763999819755554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,1,0.026092800498008727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,1,0.027566400170326234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,3,0.02595840096473694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,3,0.02765600085258484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,7,0.026187199354171752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,7,0.02768000066280365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,15,0.026196798682212828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,15,0.027651199698448183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,31,0.02609440088272095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,31,0.027683201432228088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,63,0.02611680030822754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,63,0.027764800190925597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,127,0.02635039985179901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,127,0.027793601155281067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,255,0.03191039860248566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,255,0.031774398684501645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,511,0.05034080147743225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,511,0.042921599745750424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,1023,0.07751200199127198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,1023,0.06359519958496093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,3,0.03371999859809875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,3,0.03687039911746979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,7,0.0338591992855072
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,7,0.0366784006357193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,float16,2047,0.12061599493026734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,4,128,1,float16,fp8,2047,0.08785120248794556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,1,0.03367680013179779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,1,0.03660959899425507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,15,0.0339792013168335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,15,0.03687680065631867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,31,0.03355199992656708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,31,0.03674240112304687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,63,0.033929601311683655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,63,0.03663359880447388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,127,0.036881598830223086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,127,0.03685919940471649
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,255,0.055452799797058104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,255,0.047891199588775635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,511,0.0765775978565216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,511,0.06686879992485047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,1023,0.12341760396957398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,1023,0.09571679830551147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,float16,2047,0.20939040184020996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,1,0.0174127995967865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,96,8,128,1,float16,fp8,2047,0.14579999446868896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,1,0.018369600176811218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,3,0.017611199617385866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,3,0.018320000171661376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,7,0.01759999990463257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,7,0.01838880032300949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,15,0.017791999876499175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,15,0.01841759979724884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,31,0.01765599995851517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,31,0.01849599927663803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,63,0.017726400494575502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,63,0.01839040070772171
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,127,0.01758880019187927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,127,0.018492799997329713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,255,0.019310399889945984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,255,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,511,0.022438399493694305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,511,0.024374400079250336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,1023,0.022401599586009978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,1023,0.02388480007648468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,float16,2047,0.02284799963235855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,1,128,1,float16,fp8,2047,0.024278399348258973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,1,0.018415999412536622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,1,0.019100800156593323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,3,0.01834239959716797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,31,0.0192671999335289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,3,0.019057600200176238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,7,0.018291200697422027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,7,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,15,0.018199999630451203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,15,0.01921280026435852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,31,0.01836320012807846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,63,0.018438400328159334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,63,0.01918880045413971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,127,0.01823039948940277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,127,0.019094400107860565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,255,0.01973759979009628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,255,0.021147200465202333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,511,0.022963200509548188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,511,0.024984000623226164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,1023,0.022891199588775633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,1023,0.02431039959192276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,float16,2047,0.023057599365711213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,2,128,1,float16,fp8,2047,0.02447360008955002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,1,0.019704000651836397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,1,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,3,0.020019200444221497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,3,0.0208639994263649
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,7,0.019998399913311003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,7,0.02082560062408447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,15,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,15,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,31,0.019702400267124175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,31,0.02099200040102005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,63,0.019888000190258028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,63,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,127,0.01988479942083359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,127,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,255,0.02147520035505295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,255,0.022537599503993987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,511,0.024646399915218352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,511,0.026606398820877075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,1023,0.024371199309825897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,1023,0.025968000292778015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,float16,2047,0.024868799746036528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,4,128,1,float16,fp8,2047,0.02595199942588806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,1,0.01988479942083359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,1,0.020951999723911284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,3,0.019969600439071655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,3,0.02091040015220642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,7,0.02014079988002777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,7,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,15,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,15,0.020588800311088562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,31,0.01995519995689392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,255,0.02144960016012192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,31,0.020798400044441223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,63,0.020107200741767882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,63,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,127,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,127,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,255,0.02295359969139099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,511,0.024323199689388276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,511,0.026265600323677064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,1023,0.024299199879169463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,1023,0.026105600595474242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,float16,2047,0.025065600872039795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,96,8,128,1,float16,fp8,2047,0.026017600297927858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,1,0.018228800594806673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,1,0.019011199474334717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,3,0.01847680062055588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,3,0.019334399700164796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,7,0.01854719966650009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,7,0.019124799966812135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,15,0.01844639927148819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,15,0.019190399348735808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,31,0.01839199960231781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,63,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,511,0.02290080040693283
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,31,0.01929440051317215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,511,0.025329598784446718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,1023,0.02340800017118454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,63,0.01950560063123703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,127,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,127,0.01942239999771118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,255,0.020168000459671022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,255,0.02126079946756363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,1023,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,float16,2047,0.023875199258327484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,1,128,1,float16,fp8,2047,0.025251200795173644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,1,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,1,0.02099200040102005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,3,0.020073600113391876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,3,0.02080000042915344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,7,0.020121599733829498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,7,0.020947200059890748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,15,0.019995200634002685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,15,0.02099359929561615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,31,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,31,0.021048000454902648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,63,0.02006240040063858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,63,0.02096640020608902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,127,0.02006240040063858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,127,0.020950399339199066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,255,0.021583999693393707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,255,0.02306720018386841
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,511,0.02492479979991913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,511,0.02683840095996857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,1023,0.024873599410057068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,1023,0.026158401370048524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,float16,2047,0.02512960135936737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,2,128,1,float16,fp8,2047,0.026660799980163574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,1,0.019912000000476836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,1,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,3,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,3,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,7,0.019968000054359437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,7,0.0208079993724823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,15,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,15,0.020974400639533996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,31,0.0200080007314682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,31,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,63,0.020259200036525725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,63,0.0210207998752594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,127,0.02019200026988983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,127,0.020812800526618956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,255,0.02189279943704605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,255,0.023012800514698027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,511,0.02470560073852539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,511,0.026468798518180847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,1023,0.02473919987678528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,1023,0.026051199436187743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,float16,2047,0.02534080147743225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,4,128,1,float16,fp8,2047,0.0264847993850708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,1,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,1,0.021009600162506102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,3,0.019971199333667755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,3,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,7,0.02016959935426712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,7,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,15,0.02025440037250519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,15,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,31,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,31,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,63,0.02035840004682541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,63,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,127,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,511,0.02678399980068207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,127,0.021087999641895293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,255,0.021902400255203246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,255,0.023257599771022798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,511,0.024676799774169922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,1023,0.025147199630737305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,1023,0.02603999972343445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,float16,2047,0.026761600375175477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,96,8,128,1,float16,fp8,2047,0.027057600021362305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,1,0.021118399500846863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,1,0.021990400552749634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,3,0.021161599457263945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,3,0.022071999311447144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,7,0.021289600431919097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,7,0.022257600724697114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,15,0.021265600621700288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,15,0.022129599750041962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,31,0.021171200275421142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,31,0.0221343994140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,63,0.021187199652194975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,63,0.022137600183486938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,127,0.021804800629615782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,127,0.02221439927816391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,255,0.023238399624824525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,255,0.02417919933795929
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,511,0.02597759962081909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,511,0.027695998549461365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,1023,0.02927039861679077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,1023,0.029311999678611755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,float16,2047,0.03633440136909485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,1,128,1,float16,fp8,2047,0.03300960063934326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,1,0.021356800198554994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,1,0.022129599750041962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,3,0.02123199999332428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,3,0.022217600047588347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,7,0.02131199985742569
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,7,0.022043199837207796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,15,0.021348799765110015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,15,0.022316800057888032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,31,0.02133760005235672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,31,0.022243200242519377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,63,0.021167999505996703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,63,0.022332799434661866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,127,0.02162880003452301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,127,0.02234400063753128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,255,0.02316800057888031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,255,0.024294400215148927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,511,0.02661440074443817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,511,0.02770400047302246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,1023,0.03115679919719696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,1023,0.031939199566841124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,float16,2047,0.05261440277099609
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,2,128,1,float16,fp8,2047,0.04428159892559051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,1,0.021631999313831328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,1,0.022431999444961548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,3,0.02154559940099716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,3,0.022491200268268584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,7,0.02144960016012192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,7,0.022486400604248048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,15,0.021503999829292297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,15,0.022462399303913118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,31,0.02152640074491501
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,255,0.024403199553489685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,31,0.02253119945526123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,63,0.021668800711631776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,63,0.02234559953212738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,127,0.021678400039672852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,127,0.022735999524593355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,255,0.023577600717544556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,511,0.029105600714683533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,511,0.028288000822067262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,1023,0.049534401297569274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,1023,0.040092799067497256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,float16,2047,0.07252159714698792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,1,0.025574401021003723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,4,128,1,float16,fp8,2047,0.056732797622680665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,1,0.027025601267814635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,3,0.02540639936923981
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,3,0.026840001344680786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,7,0.025308799743652344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,7,0.026977598667144775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,15,0.025284799933433532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,15,0.02693760097026825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,31,0.02526240050792694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,31,0.026881599426269533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,63,0.025446400046348572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,63,0.026840001344680786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,127,0.0257999986410141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,127,0.02683199942111969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,255,0.03012320101261139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,255,0.030913600325584413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,511,0.0489760011434555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,511,0.04218400120735168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,1023,0.07429760098457336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,1023,0.06116960048675537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,float16,2047,0.11728639602661133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,96,8,128,1,float16,fp8,2047,0.08621119856834411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,1,0.02003040015697479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,1,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,3,0.020263999700546265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,3,0.021164800226688384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,7,0.020275199413299562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,7,0.02123199999332428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,15,0.02027200013399124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,15,0.020854400098323823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,31,0.02024320065975189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,31,0.02099359929561615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,63,0.020377600193023683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,63,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,127,0.020360000431537628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,127,0.02112479954957962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,255,0.02184640020132065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,255,0.023089599609375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,511,0.024886399507522583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,511,0.027140799164772033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,1023,0.025387200713157653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,1023,0.0263808012008667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,float16,2047,0.025649601221084596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,3,0.02115360051393509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,1,128,1,float16,fp8,2047,0.026902401447296144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,1,0.020068800449371337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,1,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,3,0.020263999700546265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,7,0.02011999934911728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,7,0.021155199408531188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,15,0.02027200013399124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,15,0.02088640034198761
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,31,0.02022880017757416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,31,0.02099999934434891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,63,0.020201599597930907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,63,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,127,0.02021760046482086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,127,0.021264000236988066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,255,0.0219200000166893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,255,0.023049600422382355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,511,0.02486239969730377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,511,0.026526400446891786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,1023,0.025011199712753295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,1023,0.026001599431037904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,float16,2047,0.02539519965648651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,2,128,1,float16,fp8,2047,0.026625600457191468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,1,0.02024320065975189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,1,0.021188800036907197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,3,0.020177599787712098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,31,0.021009600162506102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,3,0.021035200357437132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,7,0.020211200416088104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,7,0.02099519968032837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,15,0.020596800744533537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,15,0.021003200113773345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,31,0.020212799310684204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,63,0.02021760046482086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,63,0.021092799305915833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,127,0.020100800693035124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,127,0.021191999316215515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,255,0.022235199809074402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,255,0.023024000227451324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,511,0.025190401077270507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,511,0.026726400852203368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,1023,0.025195199251174926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,1023,0.026092800498008727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,float16,2047,0.026742398738861084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,4,128,1,float16,fp8,2047,0.026976001262664796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,1,0.020454399287700653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,1,0.021191999316215515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,3,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,3,0.021243199706077576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,7,0.02045920044183731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,7,0.020947200059890748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,15,0.020465600490570068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,15,0.021408000588417055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,31,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,31,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,63,0.020449599623680113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,63,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,127,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,127,0.02130880057811737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,255,0.022268800437450408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,255,0.02362080067396164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,511,0.02510400116443634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,511,0.027008000016212463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,1023,0.02658880054950714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,1023,0.026974400877952574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,float16,2047,0.028655999898910524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,96,8,128,1,float16,fp8,2047,0.030427199602127076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,1,0.024241599440574645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,1,0.025147199630737305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,3,0.023715199530124666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,3,0.02478239983320236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,7,0.023580799996852874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,7,0.02473919987678528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,15,0.023420800268650056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,15,0.02508159875869751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,31,0.02449440062046051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,31,0.02468799948692322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,63,0.02370239943265915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,63,0.024675199389457704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,127,0.024697600305080412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,127,0.024585600197315215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,255,0.02720640003681183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,255,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,511,0.03258239924907684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,511,0.03034079968929291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,float16,1023,0.05407840013504028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,1,128,1,float16,fp8,1023,0.04580320119857788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,1,0.02707040011882782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,1,0.029297599196434022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,3,0.027569600939750673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,3,0.028999999165534973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,7,0.02744159996509552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,7,0.02911520004272461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,15,0.02735840082168579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,15,0.028958401083946227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,31,0.02765600085258484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,31,0.02948319911956787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,63,0.027444800734519957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,255,0.03358719944953918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,63,0.029246398806571962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,127,0.027614399790763855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,127,0.02919999957084656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,255,0.03595840036869049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,511,0.05332319736480713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,511,0.04500640034675598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,float16,1023,0.08094879984855652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,2,128,1,float16,fp8,1023,0.06564159989356995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,1,0.035097599029541016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,1,0.03824320137500763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,3,0.03485440015792847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,3,0.038288000226020816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,7,0.03499360084533691
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,7,0.0385919988155365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,15,0.03513120114803314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,15,0.0383679986000061
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,31,0.03509120047092438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,31,0.03789600133895874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,63,0.035339200496673585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,63,0.03815360069274902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,127,0.03982880115509033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,127,0.03880960047245026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,255,0.058481597900390626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,255,0.05208479762077332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,511,0.08011199831962586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,511,0.06842399835586548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,float16,1023,0.12704160213470458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,4,128,1,float16,fp8,1023,0.0990831971168518
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,1,0.049219200015068056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,1,0.05607519745826721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,3,0.04973599910736084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,3,0.05596320033073425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,7,0.04914720058441162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,7,0.05547680258750916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,15,0.04969600141048432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,15,0.05591999888420105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,31,0.04952319860458374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,31,0.05613440275192261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,63,0.05597599744796753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,63,0.0561568021774292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,127,0.0639136016368866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,127,0.0619488000869751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,255,0.08658080101013184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,255,0.08190879821777344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,511,0.12844159603118896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,511,0.11034879684448243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,float16,1023,0.21872799396514891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,1,0.03091680109500885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,96,8,128,1,float16,fp8,1023,0.16363680362701416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,1,0.0325872004032135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,3,0.03094879984855652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,3,0.032625600695610046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,7,0.031222400069236756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,7,0.03275200128555298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,15,0.03288640081882477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,15,0.030924800038337707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,31,0.030817601084709167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,31,0.03288959860801697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,63,0.03094240128993988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,63,0.03270399868488312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,127,0.03317599892616272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,127,0.032529601454734804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,float16,255,0.04640159904956818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,1,128,1,float16,fp8,255,0.037118399143218996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,1,0.038068801164627075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,1,0.04154399931430817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,3,0.037868800759315493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,3,0.04141440093517303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,7,0.037959998846054076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,7,0.04156160056591034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,15,0.03814719915390015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,15,0.041596800088882446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,31,0.037920001149177554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,31,0.04107199907302857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,63,0.03855839967727661
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,255,0.0568336009979248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,63,0.04126720130443573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,127,0.04962719976902008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,fp8,127,0.04184640049934387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,2,128,1,float16,float16,255,0.06265439987182617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,1,0.05247039794921875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,1,0.05900480151176453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,3,0.05257120132446289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,3,0.05869280099868775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,7,0.05257279872894287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,7,0.05936639904975891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,15,0.05283520221710205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,15,0.058740800619125365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,31,0.05244960188865662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,31,0.05895199775695801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,63,0.060420799255371097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,63,0.05922560095787048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,127,0.06659039855003357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,127,0.06966879963874817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,float16,255,0.08929920196533203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,4,128,1,float16,fp8,255,0.08517439961433411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,1,0.08061280250549316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,1,0.09403200149536133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,3,0.08106080293655396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,3,0.09497600197792053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,7,0.08125439882278443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,7,0.09457759857177735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,15,0.08126239776611328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,15,0.09392480254173279
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,31,0.08724799752235413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,127,0.11232320070266724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,31,0.09466080069541931
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,63,0.09280319809913636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,63,0.10393439531326294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,127,0.10003999471664429
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,float16,255,0.14716639518737792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,96,8,128,1,float16,fp8,255,0.1359439969062805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,1,0.04586080014705658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,3,0.045771199464797976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,1,0.04900799989700318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,3,0.04896480143070221
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,7,0.04651040136814118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,7,0.048665601015090945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,15,0.04659520089626312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,15,0.04905920028686524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,31,0.047707200050354004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,31,0.04893600046634674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,63,0.05387200117111206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,63,0.050380802154541014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,float16,127,0.059248000383377075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,1,128,1,float16,fp8,127,0.05316799879074097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,1,0.05905119776725769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,1,0.06561279892921448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,3,0.060380798578262326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,3,0.06572160124778748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,7,0.060433602333068846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,7,0.06622239947319031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,15,0.0608784019947052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,15,0.06587039828300476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,31,0.06239200234413147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,31,0.06595519781112671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,63,0.07059199810028076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,3,0.087772798538208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,63,0.07041280269622803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,float16,127,0.07658560276031494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,2,128,1,float16,fp8,127,0.07777919769287109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,1,0.08923839926719665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,1,0.10067520141601563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,3,0.10027040243148803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,7,0.08906239867210389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,63,0.1005519986152649
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,7,0.10067039728164673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,15,0.09096159934997558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,15,0.10157279968261719
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,31,0.09845600128173829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,1,0.1694815993309021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,31,0.10476640462875367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,63,0.11237920522689819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,float16,127,0.10807199478149414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,4,128,1,float16,fp8,127,0.11799360513687134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,1,0.1434272050857544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,3,0.14536960124969484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,3,0.1701151967048645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,7,0.145196795463562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,7,0.16918879747390747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,15,0.1531615972518921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,15,0.17097120285034179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,31,0.15737440586090087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,31,0.18079520463943483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,63,0.15998079776763915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,63,0.18801759481430053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,float16,127,0.17475680112838746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,1,0.0205487996339798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,96,8,128,1,float16,fp8,127,0.20152480602264405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,1,0.021324799954891206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,3,0.020292800664901734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,3,0.02114879935979843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,7,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,7,0.021169599890708924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,15,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,15,0.021414400637149812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,31,0.020452800393104553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,31,0.021083199977874757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,63,0.0203232005238533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,63,0.02110559940338135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,127,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,127,0.021457600593566894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,255,0.022265599668025972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,255,0.023318399488925935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,511,0.02498079985380173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,511,0.02672800123691559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,1023,0.025366398692131042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,1023,0.026675200462341307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,float16,2047,0.026868799328804018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,1,128,1,float16,fp8,2047,0.027504000067710876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,1,0.020396800339221956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,1,0.020950399339199066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,3,0.020374399423599244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,3,0.02130240052938461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,7,0.020425599813461304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,7,0.021401600539684297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,15,0.02060000002384186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,15,0.02134079933166504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,31,0.020316800475120543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,31,0.02104640007019043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,63,0.020547200739383698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,63,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,127,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,127,0.02125599980354309
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,255,0.022303999960422517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,255,0.02290560007095337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,511,0.02499680072069168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,511,0.02698720097541809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,1023,0.025224000215530396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,1023,0.027008000016212463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,float16,2047,0.02720479965209961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,2,128,1,float16,fp8,2047,0.027153599262237548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,1,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,1,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,3,0.020520000159740447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,3,0.02146400064229965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,7,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,7,0.02152000069618225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,15,0.020572799444198608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,15,0.02150239944458008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,31,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,31,0.02152799963951111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,63,0.02051360011100769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,63,0.02152000069618225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,127,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,127,0.021964800357818604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,255,0.022603200376033784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,255,0.02351839989423752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,511,0.025249600410461426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,511,0.027020800113677978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,1023,0.026723200082778932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,1023,0.02709600031375885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,float16,2047,0.02964319884777069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,4,128,1,float16,fp8,2047,0.030907198786735535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,1,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,1,0.021430400013923646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,3,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,3,0.02149440050125122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,7,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,7,0.021745599806308746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,15,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,15,0.02165919989347458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,31,0.020785599946975708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,31,0.021588799357414246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,63,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,63,0.0213359996676445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,127,0.020891200006008147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,127,0.021614399552345277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,255,0.022332799434661866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,255,0.023809599876403808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,511,0.025761601328849793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,511,0.02715519964694977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,1023,0.02915999889373779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,1023,0.030262398719787597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,float16,2047,0.049779200553894044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,96,8,128,1,float16,fp8,2047,0.040214401483535764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,1,0.07956799864768982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,1,0.08201760053634644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,15,0.08263999819755555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,3,0.07913920283317566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,3,0.08160160183906555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,7,0.07878879904747009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,7,0.08093439936637878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,15,0.0794975996017456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,31,0.0814415991306305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,31,0.08339840173721313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,float16,63,0.08429759740829468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,1,128,1,float16,fp8,63,0.08604320287704467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,1,0.10629440546035766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,1,0.11433440446853638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,3,0.10627039670944213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,3,0.11594719886779785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,7,0.10627679824829102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,31,0.11976319551467896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,7,0.11697599887847901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,15,0.1072208046913147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,15,0.118014395236969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,31,0.10793440341949463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,float16,63,0.11182080507278443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,2,128,1,float16,fp8,63,0.12184959650039673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,1,0.1634592056274414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,1,0.18880480527877808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,3,0.16283680200576783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,3,0.1874400019645691
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,7,0.1627279996871948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,7,0.18767839670181274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,15,0.16368319988250732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,15,0.18893760442733765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,31,0.1659983992576599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,1,0.2856575965881348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,31,0.19131360054016114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,float16,63,0.1692271947860718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,4,128,1,float16,fp8,63,0.1942528009414673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,1,0.3378400087356567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,3,0.2854543924331665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,3,0.3392240047454834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,7,0.2860368013381958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,7,0.3390480041503906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,15,0.2865567922592163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,15,0.33565919399261473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,31,0.2908207893371582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,float16,63,0.2917151927947998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,31,0.3402656078338623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,1,0.12722879648208618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,1,0.13703680038452148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,96,8,128,1,float16,fp8,63,0.34066081047058105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,3,0.1271199941635132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,3,0.1360592007637024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,7,0.12735680341720582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,7,0.13630399703979493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,15,0.12842880487442015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,15,0.1380511999130249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,float16,31,0.1302623987197876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,1,128,1,float16,fp8,31,0.13798400163650512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,1,0.18059680461883545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,1,0.20228800773620606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,3,0.1785632014274597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,3,0.20189599990844725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,7,0.17974079847335817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,7,0.20339200496673585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,15,0.18071039915084838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,15,0.20362720489501954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,float16,31,0.1841231942176819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,2,128,1,float16,fp8,31,0.2056096076965332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,1,0.29436640739440917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,1,0.34486238956451415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,3,0.296451210975647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,3,0.3473184108734131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,7,0.2959183931350708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,7,0.34522879123687744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,15,0.29743359088897703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,15,0.34691998958587644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,float16,31,0.29896318912506104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,4,128,1,float16,fp8,31,0.34938559532165525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,1,0.5358640193939209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,3,0.6323008060455322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,1,0.6361248016357421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,3,0.5342927932739258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,7,0.534830379486084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,7,0.6322639942169189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,15,0.5338511943817139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,1,0.020576000213623047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,15,0.6333136081695556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,1,0.021367999911308288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,float16,31,0.5408656120300293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,3,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,96,8,128,1,float16,fp8,31,0.6331967830657959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,3,0.021193599700927733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,7,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,7,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,15,0.020532800257205962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,15,0.021300800144672394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,31,0.020577600598335265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,31,0.021404799818992615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,63,0.020553599298000335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,63,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,511,0.027054399251937866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,127,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,127,0.021243199706077576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,255,0.02215999960899353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,255,0.023345600068569183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,511,0.025115200877189638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,1023,0.02627840042114258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,1023,0.027107200026512145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,float16,2047,0.028259199857711793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,1,128,1,float16,fp8,2047,0.028863999247550964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,1,0.020691199600696562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,1,0.02099999934434891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,3,0.020558400452136992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,3,0.0211776003241539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,7,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,7,0.02144159972667694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,15,0.020848000049591066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,15,0.02131360024213791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,31,0.020479999482631683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,31,0.021712000668048858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,63,0.02051199972629547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,63,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,127,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,127,0.021849599480628968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,255,0.022404800355434417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,255,0.023414400219917298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,511,0.02574400007724762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,511,0.026846399903297423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,1023,0.027422401309013366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,1023,0.027454400062561037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,float16,2047,0.03115839958190918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,2,128,1,float16,fp8,2047,0.03150720000267029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,1,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,1,0.021622399985790252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,3,0.020614400506019592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,3,0.02165440022945404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,7,0.02091200053691864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,7,0.021849599480628968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,15,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,15,0.021777600049972534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,31,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,31,0.02136960029602051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,63,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,63,0.021963199973106383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,127,0.02091040015220642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,127,0.02208160012960434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,255,0.02271360009908676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,255,0.023720000684261323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,511,0.0259552001953125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,511,0.027132800221443175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,1023,0.031206399202346802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,1023,0.03065280020236969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,float16,2047,0.05082560181617737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,4,128,1,float16,fp8,2047,0.038950398564338684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,1,0.02111999988555908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,1,0.021934400498867034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,3,0.021223999559879303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,3,0.021840000152587892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,7,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,7,0.022145600616931917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,15,0.021187199652194975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,15,0.021934400498867034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,31,0.02098720073699951
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,31,0.021766400337219237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,63,0.021096000075340272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,63,0.021984000504016877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,127,0.021134400367736818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,127,0.022360000014305114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,255,0.023038400709629057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,255,0.024027200043201448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,511,0.02802880108356476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,511,0.02757120132446289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,1023,0.04836640059947968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,1023,0.037723198533058167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,float16,2047,0.07062720060348511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,96,8,128,1,float16,fp8,2047,0.05578719973564148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,1,0.021982400119304656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,1,0.022729599475860597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,3,0.02189760059118271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,3,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,7,0.02194560021162033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,7,0.022617599368095397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,15,0.02162880003452301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,15,0.023177599906921385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,31,0.022043199837207796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,31,0.02288320064544678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,63,0.021887999773025513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,63,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,127,0.022281600534915923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,127,0.022819200158119203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,255,0.023731200397014617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,255,0.02465119957923889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,511,0.027136000990867614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,511,0.028409600257873535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,1023,0.03642399907112122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,1023,0.03259679973125458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,2047,0.053939199447631835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,2047,0.04469279944896698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,float16,4095,0.07561759948730469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,1,128,1,float16,fp8,4095,0.060703998804092406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,1,0.022270399332046508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,1,0.023179200291633607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,3,0.022228799760341644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,3,0.02296479940414429
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,7,0.022171199321746826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,7,0.023001599311828613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,15,0.021988800168037413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,15,0.022916799783706664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,31,0.022144000232219695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,31,0.023192000389099122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,63,0.02211360037326813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,63,0.022993600368499754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,127,0.022563199698925018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,127,0.02287999987602234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,255,0.023988799750804903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,255,0.024977600574493407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,511,0.030103999376296996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,511,0.02858240008354187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,1023,0.05035039782524109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,1023,0.0423471987247467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,2047,0.07195519804954528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,2047,0.0579472005367279
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,float16,4095,0.11813600063323974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,1,0.02587040066719055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,2,128,1,float16,fp8,4095,0.08216639757156372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,1,0.027476799488067628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,3,0.026046401262283324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,3,0.02739199995994568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,7,0.025779199600219727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,7,0.02752319872379303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,15,0.026025599241256712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,15,0.027622398734092713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,31,0.025961598753929137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,31,0.027636799216270446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,63,0.02581920027732849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,63,0.02741760015487671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,127,0.025905600190162657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,511,0.03958080112934113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,127,0.027657601237297057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,255,0.03135519921779632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,255,0.0314736008644104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,511,0.05006880164146423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,1023,0.07681120038032532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,1023,0.06218240261077881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,2047,0.11934720277786255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,2047,0.08672959804534912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,float16,4095,0.204803204536438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,1,0.033671998977661134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,4,128,1,float16,fp8,4095,0.13711199760437012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,1,0.03657439947128296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,3,0.033369600772857666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,3,0.036487999558448794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,7,0.033292800188064575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,7,0.036471998691558837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,15,0.033739200234413146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,15,0.03687199950218201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,31,0.03341760039329529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,31,0.03670560121536255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,63,0.03308480083942413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,63,0.03652639985084534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,127,0.03601439893245697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,127,0.03678719997406006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,255,0.05568000078201294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,255,0.04874080121517181
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,511,0.07628960013389588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,511,0.0671455979347229
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,1023,0.1231727957725525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,1023,0.09595839977264405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,2047,0.2099152088165283
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,2047,0.14460159540176393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,1,0.017712000012397765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,1,0.018287999927997588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,float16,4095,0.3772416114807129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,64,8,128,1,float16,fp8,4095,0.24320321083068847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,3,0.017385600507259368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,3,0.018460799753665925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,7,0.01743199974298477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,7,0.01863359957933426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,15,0.01759839951992035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,15,0.01839679926633835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,31,0.017508800327777862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,31,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,63,0.017369599640369417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,63,0.018484799563884734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,127,0.017449599504470826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,127,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,255,0.01923519968986511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,255,0.02035519927740097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,511,0.022302399575710296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,511,0.024454399943351746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,1023,0.022275200486183165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,1023,0.023742400109767914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,2047,0.022675199806690215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,2047,0.023614400625228883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,float16,4095,0.022884799540042876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,1,128,1,float16,fp8,4095,0.024195200204849242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,1,0.01839359998703003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,1,0.019223999977111817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,3,0.018172800540924072
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,3,0.01918720006942749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,7,0.01826400011777878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,7,0.0191551998257637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,15,0.018385599553585052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,15,0.019299200177192687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,31,0.018244799971580506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,31,0.01923519968986511
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,63,0.018273599445819855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,63,0.019232000410556793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,127,0.01838880032300949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,127,0.019332799315452575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,255,0.020059199631214143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,255,0.02131360024213791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,511,0.022987200319766997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,511,0.025060799717903138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,1023,0.022593599557876588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,1023,0.024276800453662872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,2047,0.02300640046596527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,2047,0.02401120066642761
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,float16,4095,0.02388000041246414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,2,128,1,float16,fp8,4095,0.024952000379562377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,1,0.019806399941444397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,1,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,3,0.01972000002861023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,3,0.02083680033683777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,7,0.0200095996260643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,7,0.020916800200939178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,15,0.019990399479866028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,15,0.02056799978017807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,31,0.019886399805545806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,31,0.020814399421215057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,63,0.019980800151824952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,63,0.020854400098323823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,127,0.019976000487804412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,127,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,255,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,255,0.02277279943227768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,511,0.024500800669193266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,511,0.026499199867248534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,1023,0.02449440062046051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,1023,0.0257968008518219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,2047,0.02489439994096756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,2047,0.025924798846244813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,float16,4095,0.02571359872817993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,4,128,1,float16,fp8,4095,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,1,0.02004159986972809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,1,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,3,0.020201599597930907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,3,0.020852799713611602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,7,0.019920000433921815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,7,0.020604799687862396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,15,0.01996160000562668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,15,0.021070399880409242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,31,0.019985599815845488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,31,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,63,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,63,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,127,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,127,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,255,0.021555200219154358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,255,0.023112000524997713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,511,0.024616000056266785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,511,0.02656480073928833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,1023,0.024264000356197357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,1023,0.02569440007209778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,2047,0.024956800043582916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,2047,0.026156800985336303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,float16,4095,0.026748800277709962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,64,8,128,1,float16,fp8,4095,0.02727999985218048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,1,0.018345600366592406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,1,0.019252799451351166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,3,0.018464000523090364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,3,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,7,0.018488000333309173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,7,0.018980799615383147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,15,0.018321600556373597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,15,0.019177600741386414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,31,0.01852640062570572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,31,0.01932000070810318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,63,0.018441599607467652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,63,0.019382399320602418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,127,0.018883199989795686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,127,0.018913599848747253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,255,0.019968000054359437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,255,0.02122880071401596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,511,0.02314399927854538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,511,0.0250575989484787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,1023,0.023124800622463228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,1023,0.02484800070524216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,2047,0.023585599660873414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,3,0.020924800634384157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,2047,0.02468799948692322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,float16,4095,0.0239424005150795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,1,128,1,float16,fp8,4095,0.02521919906139374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,1,0.020057600736618043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,1,0.020982399582862854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,3,0.02019840031862259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,7,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,7,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,15,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,15,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,31,0.020044800639152528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,31,0.0210207998752594
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,63,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,63,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,127,0.020084799826145174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,127,0.020902399718761445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,255,0.021524800360202788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,255,0.023049600422382355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,511,0.024583999812602998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,2047,0.025996801257133485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,511,0.026791998744010927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,1023,0.02467840015888214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,1023,0.025872001051902772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,2047,0.025051200389862062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,float16,4095,0.026054400205612182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,2,128,1,float16,fp8,4095,0.026791998744010927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,1,0.02096640020608902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,1,0.020313599705696107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,3,0.020204800367355346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,3,0.02094080001115799
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,7,0.020121599733829498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,7,0.0208624005317688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,15,0.02001120001077652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,15,0.02086080014705658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,31,0.020399999618530274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,31,0.021084800362586975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,63,0.02024320065975189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,63,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,127,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,127,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,255,0.021721599996089934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,255,0.02324160039424896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,511,0.025054401159286498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,511,0.026528000831604004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,1023,0.024635200202465058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,1023,0.02584159970283508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,2047,0.025225600600242613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,2047,0.02608479857444763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,float16,4095,0.027233600616455078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,4,128,1,float16,fp8,4095,0.02775680124759674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,1,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,1,0.020857599377632142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,3,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,3,0.020769600570201874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,7,0.01989919990301132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,7,0.021080000698566435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,15,0.020547200739383698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,15,0.021139200031757354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,31,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,31,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,63,0.020235200226306916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,63,0.02110240012407303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,127,0.020262399315834047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,127,0.02133280038833618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,1023,0.026080000400543212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,255,0.022040000557899474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,2047,0.026398399472236635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,255,0.023104000091552734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,511,0.024758400022983552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,511,0.026598399877548216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,1023,0.024852800369262695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,2047,0.027134400606155396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,float16,4095,0.03017919957637787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,1,0.021031999588012697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,64,8,128,1,float16,fp8,4095,0.03107840120792389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,1,0.02178879976272583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,3,0.021080000698566435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,3,0.021886399388313292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,7,0.02096160054206848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,7,0.022225600481033326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,15,0.021209600567817687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,15,0.022232000529766083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,31,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,31,0.021937599778175353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,63,0.020974400639533996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,63,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,127,0.02138720005750656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,127,0.02232320010662079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,255,0.023292799293994904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,255,0.02404959946870804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,511,0.026214399933815004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,511,0.027588799595832825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,1023,0.028641599416732787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,1023,0.028288000822067262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,2047,0.03129279911518097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,2047,0.03205440044403076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,float16,4095,0.05278720259666443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,1,128,1,float16,fp8,4095,0.042735999822616576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,1,0.021294400095939636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,1,0.022019200026988983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,3,0.021116800606250763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,3,0.022089600563049316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,7,0.020975999534130096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,7,0.022070400416851044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,15,0.021547199785709382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,15,0.0221328005194664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,31,0.02138399928808212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,31,0.021966400742530822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,63,0.02130720019340515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,63,0.02202879935503006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,127,0.021135999262332915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,127,0.022443200647830962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,255,0.023289600014686586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,255,0.024456000328063963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,511,0.026257601380348206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,511,0.027584001421928406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,1023,0.03187359869480133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,1023,0.031118398904800414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,2047,0.051583999395370485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,2047,0.0397487998008728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,float16,4095,0.07346240282058716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,2,128,1,float16,fp8,4095,0.058385598659515384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,1,0.021499200165271758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,1,0.022225600481033326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,3,0.02143999934196472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,3,0.02212799936532974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,7,0.0216048002243042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,7,0.022622400522232057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,15,0.02160640060901642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,15,0.02242079973220825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,31,0.021404799818992615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,31,0.02263360023498535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,63,0.021299199759960176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,63,0.02250880002975464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,127,0.022140799462795256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,511,0.028060799837112425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,127,0.02264000028371811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,255,0.023503999412059783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,2047,0.07108799815177917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,255,0.024225600063800812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,511,0.027635198831558228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,1023,0.04899680018424988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,1023,0.04004479944705963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,1,0.026675200462341307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,2047,0.056871998310089114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,1,0.025286400318145753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,float16,4095,0.11473759412765502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,4,128,1,float16,fp8,4095,0.08120480179786682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,3,0.02529120147228241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,3,0.02704800069332123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,7,0.025235199928283693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,7,0.02693440020084381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,15,0.02529920041561127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,15,0.027127999067306518
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,31,0.025123199820518492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,31,0.026761600375175477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,63,0.025678399205207824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,63,0.02691200077533722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,127,0.02579520046710968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,127,0.026763200759887695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,255,0.031737598776817325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,1023,0.06106399893760681
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,255,0.0305759996175766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,511,0.04882239997386932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,511,0.04232159852981567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,1023,0.07499840259552001
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,2047,0.11695359945297241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,2047,0.08632959723472595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,float16,4095,0.20128319263458253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,1,0.0200655996799469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,64,8,128,1,float16,fp8,4095,0.1357408046722412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,1,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,3,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,3,0.020883199572563172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,7,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,63,0.021059200167655945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,7,0.021169599890708924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,15,0.02038719952106476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,15,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,31,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,31,0.021190400421619415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,63,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,127,0.020259200036525725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,127,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,255,0.02189760059118271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,255,0.023048000037670137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,2047,0.026491200923919676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,511,0.02470880001783371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,511,0.026321598887443544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,1023,0.025118398666381835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,1023,0.026153600215911864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,2047,0.025249600410461426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,float16,4095,0.02637600004673004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,1,128,1,float16,fp8,4095,0.026836800575256347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,1,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,1,0.02112320065498352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,3,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,3,0.020977599918842314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,7,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,7,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,15,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,15,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,31,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,31,0.02123039960861206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,63,0.020027199387550355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,63,0.020924800634384157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,127,0.020211200416088104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,127,0.020854400098323823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,255,0.02189600020647049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,255,0.022935999929904936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,511,0.024691200256347655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,511,0.027241599559783936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,1023,0.0244719997048378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,1023,0.02619200050830841
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,2047,0.025135999917984007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,2047,0.02651520073413849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,float16,4095,0.027527999877929688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,2,128,1,float16,fp8,4095,0.027377599477767946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,1,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,1,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,3,0.020017600059509276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,3,0.020919999480247496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,7,0.020502400398254395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,7,0.02155359983444214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,15,0.020244799554347992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,15,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,31,0.02038400024175644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,31,0.02086720019578934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,63,0.020059199631214143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,63,0.021121600270271303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,127,0.020465600490570068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,1023,0.02515999972820282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,127,0.021118399500846863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,255,0.021932800114154816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,255,0.023313599824905395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,511,0.024854399263858795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,511,0.02683520019054413
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,1023,0.02646079957485199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,2047,0.02682879865169525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,2047,0.026859200000762938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,float16,4095,0.03056640028953552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,4,128,1,float16,fp8,4095,0.030956798791885377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,1,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,1,0.021164800226688384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,3,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,3,0.02119999974966049
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,7,0.02038560062646866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,7,0.021382400393486024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,15,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,15,0.02141280025243759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,31,0.02041279971599579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,31,0.021385599672794343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,63,0.020553599298000335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,63,0.02141920030117035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,127,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,127,0.02144480049610138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,255,0.02221119999885559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,255,0.023387199640274046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,511,0.025516799092292784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,2047,0.030272001028060914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,511,0.026995199918746948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,1023,0.026521599292755126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,1023,0.026743999123573302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,3,0.024566400051116943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,2047,0.031043198704719544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,float16,4095,0.04952319860458374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,64,8,128,1,float16,fp8,4095,0.04023039937019348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,1,0.023339200019836425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,1,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,3,0.02359039932489395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,7,0.023515200614929198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,7,0.024398399889469145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,15,0.023763200640678404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,15,0.025038400292396547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,31,0.023769600689411162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,31,0.024355199933052064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,63,0.023849600553512575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,63,0.024430400133132933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,127,0.024073599278926848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,127,0.024383999407291412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,255,0.026529601216316222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,255,0.02717599868774414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,511,0.032123199105262755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,511,0.029844799637794496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,1023,0.05321919918060303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,1023,0.04457440078258514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,float16,2047,0.07467679977416992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,1,128,1,float16,fp8,2047,0.05897120237350464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,1,0.02741599977016449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,1,0.02942720055580139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,15,0.028835201263427736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,3,0.027724799513816834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,3,0.028777599334716797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,7,0.02757279872894287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,7,0.028833600878715514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,15,0.02735840082168579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,31,0.02747200131416321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,31,0.02940320074558258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,63,0.02730720043182373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,63,0.02918879985809326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,127,0.027112001180648805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,127,0.02940959930419922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,255,0.03496159911155701
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,255,0.03296799957752228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,511,0.051832002401351926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,511,0.044791999459266665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,1023,0.07916799783706666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,1023,0.06530399918556214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,float16,2047,0.12169599533081055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,2,128,1,float16,fp8,2047,0.08952640295028687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,1,0.03510079979896545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,1,0.038166400790214536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,3,0.0350847989320755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,3,0.03861919939517975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,7,0.03488959968090057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,7,0.03828479945659637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,15,0.03494560122489929
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,15,0.0384656012058258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,31,0.03516960144042969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,31,0.03811199963092804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,63,0.03532159924507141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,63,0.038648000359535216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,127,0.04004800021648407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,127,0.03795520067214966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,255,0.05713760256767273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,255,0.0503711998462677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,511,0.07909280061721802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,511,0.06801279783248901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,1023,0.12569119930267333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,1023,0.09745439887046814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,float16,2047,0.21109120845794677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,1,0.04929920136928558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,4,128,1,float16,fp8,2047,0.14729119539260865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,1,0.0554639995098114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,3,0.049409601092338565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,3,0.05612800121307373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,7,0.04913600087165833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,7,0.05554559826850891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,15,0.04922559857368469
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,15,0.056062400341033936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,31,0.04927999973297119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,31,0.05576639771461487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,63,0.055211198329925534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,63,0.055726397037506106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,511,0.12516319751739502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,127,0.06275039911270142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,127,0.06075360178947449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,255,0.08635519742965699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,255,0.08069760203361512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,511,0.11020159721374512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,1023,0.2187743902206421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,1023,0.1627776026725769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,1,0.030564799904823303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,3,0.03234240114688873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,1,0.0325984001159668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,float16,2047,0.38774240016937256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,64,8,128,1,float16,fp8,2047,0.25907680988311765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,3,0.031041601300239564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,7,0.030243200063705445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,31,0.03224000036716461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,7,0.03278079926967621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,15,0.030321601033210754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,15,0.03253119885921478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,31,0.030456000566482545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,63,0.030788800120353697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,63,0.03224639892578125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,511,0.056145602464675905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,127,0.031159999966621398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,127,0.03271200060844422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,float16,255,0.04145120084285736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,255,0.03654240071773529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,1,128,1,float16,fp8,511,0.049491199851036075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,1,0.03755199909210205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,1,0.04105600118637085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,3,0.037982401251792905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,7,0.03788639903068543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,3,0.0412880003452301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,7,0.04121600091457367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,15,0.038068801164627075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,15,0.04160000085830688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,31,0.037599998712539676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,31,0.04119519889354706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,63,0.03829759955406189
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,63,0.04163999855518341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,127,0.044628798961639404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,127,0.04157919883728027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,255,0.06037759780883789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,255,0.05527999997138977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,float16,511,0.08475840091705322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,2,128,1,float16,fp8,511,0.07191519737243653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,1,0.052665597200393675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,1,0.0586624026298523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,3,0.053041601181030275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,3,0.05881440043449402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,7,0.05251359939575195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,7,0.058955198526382445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,15,0.05242400169372559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,15,0.058606398105621335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,31,0.05249279737472534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,255,0.0875216007232666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,31,0.05878559947013855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,63,0.059252798557281494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,63,0.05906879901885986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,127,0.06534559726715088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,127,0.06575359702110291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,255,0.08384640216827392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,float16,511,0.13215359449386596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,4,128,1,float16,fp8,511,0.11269439458847046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,1,0.0807263970375061
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,1,0.09512799978256226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,3,0.08089119791984559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,3,0.09429919719696045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,7,0.08153759837150573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,7,0.09371839761734009
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,15,0.08101599812507629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,15,0.09390400052070617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,31,0.0819920003414154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,255,0.1434064030647278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,31,0.09520320296287536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,63,0.09056320190429687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,63,0.10153440237045289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,127,0.09653440117835999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,127,0.11094239950180054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,255,0.13560800552368163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,float16,511,0.22936480045318602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,1,0.045049598813056944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,64,8,128,1,float16,fp8,511,0.19621599912643434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,1,0.048163199424743654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,3,0.045337599515914914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,3,0.04800800085067749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,7,0.045161598920822145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,7,0.04855040013790131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,15,0.044998401403427125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,15,0.04851999878883362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,31,0.04505600035190582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,31,0.04866879880428314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,63,0.048369601368904114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,63,0.04871520102024078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,127,0.05681759715080261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,127,0.048835200071334836
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,float16,255,0.06899039745330811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,1,128,1,float16,fp8,255,0.06451519727706909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,1,0.059443199634552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,1,0.0648032009601593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,3,0.059062397480010985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,3,0.06534240245819092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,7,0.05915840268135071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,7,0.0657423973083496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,15,0.05910720229148865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,15,0.06584640145301819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,31,0.0607807993888855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,31,0.06527680158615112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,63,0.06822559833526612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,63,0.06693599820137024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,127,0.07523679733276367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,1,0.100382399559021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,127,0.07521759867668151
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,float16,255,0.09663040041923524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,2,128,1,float16,fp8,255,0.08990880250930786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,1,0.08726720213890075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,3,0.0872816026210785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,3,0.10117440223693848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,7,0.08735679984092712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,31,0.10106559991836547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,7,0.1002992033958435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,15,0.08783040046691895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,15,0.09994239807128906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,31,0.09335520267486572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,63,0.10025759935379028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,63,0.1098031997680664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,127,0.1054800033569336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,127,0.11544480323791503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,float16,255,0.14467519521713257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,4,128,1,float16,fp8,255,0.14120639562606813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,1,0.14096319675445557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,1,0.16883679628372192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,3,0.142958402633667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,3,0.16976319551467894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,7,0.1426975965499878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,7,0.1690816044807434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,15,0.1453376054763794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,15,0.16835999488830566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,31,0.15495680570602416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,31,0.17598079442977904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,63,0.15864800214767455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,63,0.18413280248641967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,127,0.1665984034538269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,127,0.19975999593734742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,float16,255,0.24871358871459961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,1,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,64,8,128,1,float16,fp8,255,0.24042561054229736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,1,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,3,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,3,0.021323199570178985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,7,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,7,0.021073600649833678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,15,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,15,0.021476800739765167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,255,0.022043199837207796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,31,0.020262399315834047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,31,0.021054400503635405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,63,0.020411199331283568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,63,0.021328000724315642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,127,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,127,0.021193599700927733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,255,0.023531199991703035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,511,0.025028800964355467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,511,0.026764801144599913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,1023,0.025089600682258607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,1023,0.026617598533630372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,2047,0.02608320116996765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,2047,0.026590400934219362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,float16,4095,0.02771199941635132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,1,128,1,float16,fp8,4095,0.02807680070400238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,1,0.02025440037250519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,1,0.020948800444602966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,3,0.020423999428749083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,3,0.021272000670433045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,31,0.021163199841976166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,7,0.0205375999212265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,7,0.02134079933166504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,15,0.020395199954509734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,15,0.021396799385547637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,31,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,63,0.020555199682712556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,63,0.02110559940338135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,127,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,127,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,255,0.022060799598693847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,255,0.023500800132751465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,511,0.02489279955625534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,511,0.026870399713516235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,1023,0.0251008003950119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,1023,0.02616479992866516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,2047,0.026719999313354493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,2047,0.027011200785636902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,float16,4095,0.03202239871025085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,2,128,1,float16,fp8,4095,0.030859199166297913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,1,0.020545600354671477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,1,0.02125760018825531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,3,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,3,0.02141920030117035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,7,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,7,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,15,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,15,0.021359999477863312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,31,0.020577600598335265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,31,0.021536000072956085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,63,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,63,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,127,0.020547200739383698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,127,0.021190400421619415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,255,0.022310400009155275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,255,0.02348800003528595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,511,0.025193598866462708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,511,0.027191999554634094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,1023,0.026601600646972656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,1023,0.02690559923648834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,2047,0.03083840012550354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,1,0.021556800603866576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,2047,0.030723199248313904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,float16,4095,0.05012480020523071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,4,128,1,float16,fp8,4095,0.03995679914951324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,1,0.020848000049591066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,3,0.020632000267505647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,3,0.02136480063199997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,7,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,7,0.021408000588417055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,15,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,15,0.021624000370502473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,31,0.020603199303150178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,31,0.02171040028333664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,63,0.020708799362182617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,63,0.021766400337219237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,127,0.02094399929046631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,127,0.021585600078105928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,255,0.022623999416828154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,255,0.023710399866104126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,511,0.02529279887676239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,511,0.027422401309013366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,1023,0.028489598631858827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,1023,0.03039200007915497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,2047,0.04904640018939972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,2047,0.03871839940547943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,float16,4095,0.07135679721832275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,64,8,128,1,float16,fp8,4095,0.0568943977355957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,1,0.07011839747428894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,1,0.07431039810180665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,3,0.0700543999671936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,3,0.07414720058441163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,31,0.07546399831771851
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,7,0.07001280188560485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,7,0.07450559735298157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,15,0.07283040285110473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,15,0.0742896020412445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,31,0.07588639855384827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,63,0.07975839972496032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,3,0.09901120066642762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,63,0.08239039778709412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,float16,127,0.08459200263023377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,7,0.10833120346069336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,15,0.1015023946762085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,1,128,1,float16,fp8,127,0.08527680039405823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,1,0.10045759677886963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,1,0.10892159938812256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,3,0.10758719444274903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,7,0.09905279874801635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,15,0.10990079641342163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,31,0.10443040132522582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,31,0.1140895962715149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,63,0.10838719606399536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,63,0.11923040151596069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,float16,127,0.11722400188446044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,2,128,1,float16,fp8,127,0.12264480590820312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,1,0.15685440301895143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,1,0.17753440141677856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,3,0.15739359855651855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,3,0.1784559965133667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,7,0.15936959981918336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,7,0.17891680002212523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,15,0.1611407995223999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,15,0.17880640029907227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,31,0.16189119815826417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,31,0.1882383942604065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,63,0.16563680171966552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,63,0.19099199771881104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,float16,127,0.17618240118026735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,4,128,1,float16,fp8,127,0.19682559967041016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,1,0.2770064115524292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,1,0.3158751964569092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,3,0.27952640056610106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,3,0.3138256072998047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,7,0.28071839809417726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,7,0.3212480068206787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,15,0.2820127964019775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,15,0.32703518867492676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,31,0.2846319913864136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,31,0.33822879791259763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,63,0.28735039234161375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,127,0.37009921073913576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,fp8,63,0.3375056028366089
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,64,8,128,1,float16,float16,127,0.3075968027114868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,1,0.12090879678726196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,1,0.13123680353164674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,3,0.12145440578460694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,15,0.12173279523849487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,3,0.13224480152130128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,7,0.12200959920883178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,7,0.13202879428863526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,15,0.13252320289611816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,31,0.12287039756774902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,31,0.13348640203475953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,float16,63,0.12685760259628295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,1,128,1,float16,fp8,63,0.1367184042930603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,1,0.1755136013031006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,1,0.19872159957885743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,3,0.1738160014152527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,3,0.1982352018356323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,7,0.1748960018157959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,7,0.19864799976348876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,15,0.17502559423446656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,15,0.20110559463500977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,31,0.17710880041122437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,31,0.2009648084640503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,float16,63,0.18184959888458252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,2,128,1,float16,fp8,63,0.205679988861084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,1,0.2912832021713257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,1,0.3418512105941772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,15,0.2907104015350342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,3,0.2898096084594727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,3,0.34139840602874755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,7,0.28998239040374757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,7,0.3412928104400635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,15,0.3455375909805298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,31,0.29547839164733886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,31,0.34596800804138184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,float16,63,0.2985440015792847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,4,128,1,float16,fp8,63,0.35068159103393554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,1,0.5239408016204834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,1,0.6328224182128906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,3,0.5233808040618897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,3,0.6388671875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,7,0.5288496017456055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,7,0.6367631912231445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,15,0.5269728183746338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,15,0.6317168235778808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,31,0.529040002822876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,1,0.020503999292850496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,31,0.6321568012237548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,1,0.02109439969062805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,float16,63,0.5346144199371338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,3,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,3,0.021059200167655945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,64,8,128,1,float16,fp8,63,0.634335994720459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,7,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,7,0.021388800442218782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,15,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,15,0.02117439955472946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,31,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,31,0.02147040069103241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,255,0.02306240051984787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,63,0.020289599895477295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,63,0.020880000293254854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,127,0.020393599569797517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,127,0.021396799385547637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,255,0.022116799652576447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,511,0.024928000569343568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,511,0.02729920148849487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,4095,0.030883198976516722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,1023,0.025294399261474608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,1023,0.026558399200439453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,2047,0.02720319926738739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,fp8,2047,0.027326399087905885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,1,128,1,float16,float16,4095,0.033847999572753903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,1,0.020428800582885744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,1,0.02144320011138916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,3,0.02059199959039688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,3,0.021209600567817687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,7,0.02016959935426712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,7,0.021315200626850127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,15,0.020588800311088562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,15,0.02152319997549057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,31,0.020526400208473204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,31,0.021359999477863312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,63,0.020667199790477753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,63,0.021777600049972534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,127,0.020508800446987153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,127,0.021296000480651854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,255,0.022448000311851502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,255,0.02338559925556183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,511,0.025659200549125672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,511,0.02701280117034912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,1023,0.027427199482917785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,1023,0.026707199215888978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,2047,0.031508800387382505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,2047,0.030836799740791322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,float16,4095,0.05040799975395203
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,2,128,1,float16,fp8,4095,0.03917120099067688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,1,0.02096160054206848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,1,0.021715199947357176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,3,0.020980800688266753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,3,0.02172800004482269
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,7,0.02077919989824295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,7,0.021459199488162994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,15,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,15,0.02171040028333664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,31,0.020904000103473663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,31,0.021592000126838685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,63,0.020897600054740905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,63,0.02176959961652756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,127,0.02070239931344986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,127,0.021721599996089934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,255,0.022735999524593355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,255,0.024028800427913666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,511,0.025753599405288697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,511,0.027486398816108704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,1023,0.031116798520088196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,1023,0.03028160035610199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,2047,0.050179201364517215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,2047,0.04062559902667999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,float16,4095,0.07222560048103333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,4,128,1,float16,fp8,4095,0.057222402095794676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,1,0.021007999777793884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,1,0.0221328005194664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,3,0.021134400367736818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,3,0.02202720046043396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,7,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,7,0.021998399496078493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,15,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,15,0.0220208004117012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,31,0.021147200465202333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,31,0.022071999311447144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,63,0.021305599808692934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,63,0.021958400309085847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,127,0.021273599565029146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,127,0.0220880001783371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,255,0.022993600368499754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,255,0.024137599766254424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,511,0.028387200832366944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,511,0.027956798672676086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,1023,0.04880479872226715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,1023,0.038222399353981015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,2047,0.0705183982849121
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,2047,0.05599200129508972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,float16,4095,0.11373120546340942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,1,0.022038400173187256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,64,8,128,1,float16,fp8,4095,0.08092799782752991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,1,0.022889600694179536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,3,0.02223999947309494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,3,0.022801600396633148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,7,0.021830399334430695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,7,0.022788800299167633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,15,0.02171040028333664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,15,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,31,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,31,0.023030400276184082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,63,0.022342400252819063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,63,0.022657600045204163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,127,0.021950399875640868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,127,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,255,0.023686400055885314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,255,0.02465759962797165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,511,0.02696000039577484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,511,0.02842719852924347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,1023,0.03297599852085113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,8191,0.11845439672470093
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,1023,0.03171359896659851
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,2047,0.051964801549911496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,2047,0.04307360053062439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,float16,4095,0.07345280051231384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,4095,0.05895360112190247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,1,128,1,float16,fp8,8191,0.08360000252723694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,1,0.022462399303913118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,1,0.023211200535297394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,3,0.022217600047588347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,3,0.022814400494098663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,7,0.022103999555110932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,7,0.02308479994535446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,15,0.02239679992198944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,15,0.022892799973487855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,31,0.02236959934234619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,31,0.02297919988632202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,63,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,63,0.0230335995554924
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,127,0.022417600452899932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,127,0.023145599663257597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,255,0.024439999461174013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,255,0.025494399666786193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,511,0.02909280061721802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,511,0.028886398673057555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,1023,0.0502128005027771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,1023,0.03748959898948669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,2047,0.07160000205039978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,2047,0.05749120116233826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,4095,0.116702401638031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,4095,0.08285120129585266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,float16,8191,0.2020927906036377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,1,0.026025599241256712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,2,128,1,float16,fp8,8191,0.13337119817733764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,7,0.02752319872379303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,15,0.025889599323272706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,1,0.027596798539161683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,15,0.027673599123954774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,3,0.0262719988822937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,3,0.02778880000114441
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,7,0.026056000590324403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,31,0.025945600867271424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,31,0.027452799677848815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,63,0.026086398959159852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,63,0.02765600085258484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,127,0.026347199082374574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,127,0.027926400303840637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,255,0.03221440017223358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,255,0.03150399923324585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,511,0.04926399886608124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,511,0.042659199237823485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,1023,0.07606880068778991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,1023,0.06192799806594849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,2047,0.11876319646835327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,2047,0.08738080263137818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,4095,0.20408480167388915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,4095,0.13723360300064086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,1,0.03340800106525421
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,7,0.033308801054954526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,1,0.03657279908657074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,float16,8191,0.37335360050201416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,3,0.03375200033187866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,4,128,1,float16,fp8,8191,0.237825608253479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,3,0.03660959899425507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,7,0.03657279908657074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,15,0.0333983987569809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,15,0.03647040128707886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,31,0.03356960117816925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,31,0.036776000261306764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,63,0.03398720026016235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,63,0.03649759888648987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,127,0.03684319853782654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,1023,0.12245440483093262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,127,0.03682880103588104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,255,0.055009597539901735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,255,0.0477647989988327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,511,0.0749135971069336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,511,0.06716480255126953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,1023,0.09563999772071838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,2047,0.20729598999023438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,2047,0.14485759735107423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,4095,0.3753632068634033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,4095,0.24198079109191895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,1,0.01729599982500076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,1,0.018438400328159334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,3,0.01765120029449463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,3,0.018481600284576415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,float16,8191,0.7172111988067627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,32,8,128,1,float16,fp8,8191,0.44301600456237794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,7,0.017454400658607483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,15,0.01740960031747818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,7,0.018223999440670012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,15,0.018432000279426576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,31,0.017577600479125977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,31,0.01854880005121231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,63,0.017547200620174407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,63,0.01844639927148819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,127,0.017345599830150604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,127,0.018131199479103088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,255,0.018935999274253844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,255,0.020399999618530274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,511,0.022152000665664674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,511,0.02438559979200363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,1023,0.02157440036535263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,1023,0.02300799936056137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,2047,0.022155199944972993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,2047,0.02322559952735901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,4095,0.02284640073776245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,4095,0.023609599471092223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,float16,8191,0.024348799884319306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,1,128,1,float16,fp8,8191,0.025360000133514405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,1,0.018294399976730345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,1,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,3,0.018038399517536163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,3,0.018990400433540344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,7,0.018003199994564057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,7,0.01907680034637451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,15,0.018379199504852294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,15,0.0192208006978035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,31,0.018361599743366243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,31,0.0189983993768692
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,63,0.018111999332904815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,63,0.018958400189876556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,127,0.018262399733066557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,127,0.01910240054130554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,255,0.019942399859428406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,255,0.02128479927778244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,511,0.02253919988870621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,511,0.0249439999461174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,1023,0.02248159945011139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,1023,0.023830400407314302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,2047,0.023185600340366364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,2047,0.02402079999446869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,4095,0.023676800727844238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,4095,0.024963200092315674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,float16,8191,0.024798400700092316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,2,128,1,float16,fp8,8191,0.025857600569725036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,1,0.019921599328517912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,1,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,3,0.020070399343967437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,3,0.020812800526618956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,7,0.01987680047750473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,7,0.020803199708461763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,15,0.019732800126075745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,15,0.02062080055475235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,31,0.019896000623703003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,31,0.02107200026512146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,63,0.019916799664497376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,63,0.020839999616146087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,127,0.01989919990301132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,127,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,255,0.021480000019073485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,255,0.022862400114536285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,511,0.024590399861335755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,511,0.026664000749588013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,1023,0.024216000735759736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,1023,0.025721600651741026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,2047,0.024348799884319306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,2047,0.02589600086212158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,4095,0.025551998615264894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,4095,0.026416000723838807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,float16,8191,0.027331200242042542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,4,128,1,float16,fp8,8191,0.028428798913955687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,1,0.019976000487804412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,1,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,3,0.019896000623703003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,3,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,7,0.020182399451732634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,7,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,15,0.020059199631214143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,15,0.021201600134372712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,31,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,255,0.023192000389099122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,31,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,63,0.019857600331306458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,63,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,127,0.020168000459671022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,127,0.020865599811077117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,255,0.02144639939069748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,511,0.024715200066566467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,511,0.027188798785209654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,1023,0.024297599494457246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,1023,0.025803199410438536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,2047,0.024929599463939668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,2047,0.026310399174690247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,4095,0.026748800277709962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,4095,0.0271807998418808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,3,0.019364799559116363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,float16,8191,0.031302401423454286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,32,8,128,1,float16,fp8,8191,0.03120799958705902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,1,0.01834080070257187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,1,0.01910399943590164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,3,0.018195199966430663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,7,0.018452799320220946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,7,0.019728000462055206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,15,0.01828639954328537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,15,0.019393600523471832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,31,0.018243199586868285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,31,0.019227199256420135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,63,0.018335999548435213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,63,0.019283199310302736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,127,0.01844639927148819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,127,0.019215999543666838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,255,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,255,0.02123039960861206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,511,0.022881600260734557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,511,0.025094398856163026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,1023,0.022815999388694764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,1023,0.023815999925136565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,2047,0.023081600666046143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,2047,0.02417600005865097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,4095,0.02402079999446869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,4095,0.02500160038471222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,float16,8191,0.025655999779701233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,1,128,1,float16,fp8,8191,0.026675200462341307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,1,0.01982560008764267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,1,0.020769600570201874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,3,0.020185600221157073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,3,0.021070399880409242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,7,0.02019519954919815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,7,0.021003200113773345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,15,0.020187200605869295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,15,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,31,0.02009280025959015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,31,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,63,0.020129600167274476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,63,0.021033599972724915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,511,0.024872000515460967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,127,0.020043200254440306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,127,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,255,0.02162559926509857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,255,0.022841599583625794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,2047,0.026179200410842894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,511,0.02667360007762909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,1023,0.024659200012683867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,1023,0.025910401344299318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,2047,0.024959999322891235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,4095,0.025798401236534117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,4095,0.026895999908447266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,3,0.020319999754428865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,3,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,float16,8191,0.02776640057563782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,2,128,1,float16,fp8,8191,0.02863680124282837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,1,0.0200080007314682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,1,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,7,0.02011519968509674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,7,0.021137599647045136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,15,0.019945600628852846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,15,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,31,0.020337599515914916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,31,0.020931200683116914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,63,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,63,0.02086720019578934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,127,0.020377600193023683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,127,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,255,0.021532799303531646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,255,0.023086400330066682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,511,0.024751999974250795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,511,0.02666560113430023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,1023,0.024508799612522125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,1023,0.02614560127258301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,2047,0.026080000400543212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,2047,0.02502399981021881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,4095,0.026748800277709962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,4095,0.02773439884185791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,float16,8191,0.031123200058937074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,4,128,1,float16,fp8,8191,0.03128960132598877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,1,0.020161600410938264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,1,0.02128320038318634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,3,0.019990399479866028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,31,0.021334399282932282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,3,0.020996800065040587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,7,0.020160000026226043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,7,0.02096319943666458
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,15,0.02028159946203232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,15,0.021076799929142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,31,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,63,0.02012320011854172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,63,0.021324799954891206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,127,0.020310400426387785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,127,0.020972800254821778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,255,0.022019200026988983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,255,0.023083199560642243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,511,0.024881599843502043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,511,0.026657599210739135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,1023,0.02463040053844452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,1023,0.02640959918498993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,2047,0.026286399364471434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,2047,0.026915198564529418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,4095,0.03059839904308319
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,4095,0.030884799361228944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,float16,8191,0.0486624002456665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,32,8,128,1,float16,fp8,8191,0.03873760104179382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,1,0.021038399636745454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,1,0.021955199539661407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,3,0.020812800526618956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,3,0.021678400039672852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,7,0.021345600485801697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,7,0.022060799598693847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,15,0.02128159999847412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,15,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,31,0.021004800498485566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,31,0.022040000557899474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,63,0.02110079973936081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,63,0.021964800357818604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,127,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,127,0.02203039973974228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,255,0.023204800486564637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,255,0.023932799696922302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,511,0.025707200169563293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,511,0.027811199426651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,1023,0.027294400334358215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,1023,0.02746720016002655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,2047,0.030820798873901368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,2047,0.031516799330711366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,4095,0.050888001918792725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,4095,0.04266560077667236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,float16,8191,0.07204959988594055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,1,128,1,float16,fp8,8191,0.05701919794082642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,1,0.021279999613761903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,1,0.02218559980392456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,3,0.021345600485801697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,3,0.02211360037326813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,7,0.021265600621700288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,7,0.021779200434684752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,15,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,15,0.022227199375629426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,31,0.021436800062656403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,31,0.022275200486183165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,63,0.02136480063199997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,63,0.021971200406551362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,127,0.021198399364948273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,127,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,255,0.023038400709629057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,255,0.024270400404930115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,511,0.02638559937477112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,511,0.027801600098609925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,1023,0.03149600028991699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,1023,0.030686399340629576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,2047,0.05046719908714294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,2047,0.03971680104732513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,4095,0.0721776008605957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,4095,0.05837759971618652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,float16,8191,0.11726080179214478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,2,128,1,float16,fp8,8191,0.0825984001159668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,1,0.021505600214004515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,1,0.022492800652980805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,3,0.021515199542045595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,3,0.02250880002975464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,7,0.021649600565433504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,7,0.022358399629592896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,15,0.021456000208854676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,15,0.022436800599098205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,31,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,31,0.02253919988870621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,63,0.0217631995677948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,63,0.02251359969377518
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,127,0.021796800196170807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,127,0.02264160066843033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,1023,0.04152320027351379
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,255,0.023451200127601622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,255,0.024583999812602998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,511,0.02691679894924164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,511,0.02852480113506317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,1023,0.04894079864025116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,2047,0.07099360227584839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,2047,0.05610079765319824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,4095,0.11467519998550416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,4095,0.0813968002796173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,1,0.025411200523376466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,fp8,8191,0.13309600353240966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,4,128,1,float16,float16,8191,0.19958720207214356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,1,0.027001601457595826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,3,0.02547839879989624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,3,0.026825600862503053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,7,0.025417599081993102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,7,0.026659199595451356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,15,0.02534720003604889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,15,0.02687999904155731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,31,0.02553919851779938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,31,0.026953598856925963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,255,0.03089280128479004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,63,0.025515198707580566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,63,0.02694239914417267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,127,0.02582240104675293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,127,0.026736000180244447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,255,0.03141280114650726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,511,0.04795680046081543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,511,0.042403200268745424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,1023,0.07444000244140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,1023,0.060899198055267334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,2047,0.11612639427185059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,2047,0.0855679988861084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,4095,0.20061919689178467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,4095,0.13670239448547364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,1,0.020075200498104094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,1,0.02107200026512146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,float16,8191,0.3727216005325317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,32,8,128,1,float16,fp8,8191,0.23590879440307616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,3,0.02014559954404831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,3,0.020817600190639496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,7,0.02001439929008484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,7,0.020838400721549986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,15,0.019996799528598785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,15,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,31,0.02011840045452118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,31,0.02102559953927994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,63,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,63,0.020785599946975708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,127,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,127,0.020580799877643587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,255,0.021712000668048858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,255,0.023468799889087677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,511,0.024636800587177276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,511,0.02671839892864227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,1023,0.02463199943304062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,1023,0.02592639923095703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,2047,0.024910399317741395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,2047,0.02605760097503662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,4095,0.02569119930267334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,4095,0.026943999528884887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,float16,8191,0.02816160023212433
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,1,128,1,float16,fp8,8191,0.028545600175857545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,1,0.0202224001288414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,1,0.020614400506019592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,3,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,3,0.020937600731849672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,7,0.02011680006980896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,63,0.020337599515914916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,7,0.02106879949569702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,15,0.020105600357055664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,15,0.02099200040102005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,31,0.01979999989271164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,31,0.021078400313854218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,63,0.02114879935979843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,127,0.020295999944210052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,127,0.021107199788093566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,255,0.021804800629615782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,255,0.022891199588775633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,511,0.024694399535655977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,511,0.02664479911327362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,1023,0.024564799666404725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,1023,0.026025599241256712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,2047,0.02550080120563507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,2047,0.0263264000415802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,4095,0.027249601483345032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,4095,0.027318400144577027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,float16,8191,0.033057600259780884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,2,128,1,float16,fp8,8191,0.03130399882793426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,1,0.02035360038280487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,1,0.021190400421619415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,3,0.02009759992361069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,3,0.021084800362586975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,7,0.019963200390338897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,7,0.020934399962425233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,15,0.020319999754428865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,15,0.021083199977874757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,31,0.020420800149440765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,31,0.02082560062408447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,63,0.02035679966211319
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,63,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,127,0.02011200040578842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,127,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,255,0.022316800057888032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,255,0.023193599283695222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,511,0.024934400618076325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,511,0.026788800954818726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,1023,0.024616000056266785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,1023,0.02606880068778992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,2047,0.026313599944114686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,2047,0.027011200785636902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,4095,0.03206239938735962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,4095,0.03095200061798096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,float16,8191,0.04951840043067932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,4,128,1,float16,fp8,8191,0.040803200006484984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,1,0.02021760046482086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,1,0.020980800688266753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,3,0.020363199710845947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,3,0.021460799872875212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,7,0.020678399503231047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,7,0.021401600539684297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,15,0.020448000729084016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,15,0.021294400095939636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,31,0.020252799987792967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,31,0.02101919949054718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,63,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,63,0.02146880030632019
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,127,0.02078080028295517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,127,0.02146400064229965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,255,0.02218559980392456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,255,0.02337760031223297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,511,0.02524479925632477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,511,0.02715519964694977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,1023,0.026276800036430358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,1023,0.026815998554229736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,2047,0.028593599796295166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,2047,0.030668801069259642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,4095,0.048449599742889406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,4095,0.0406495988368988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,float16,8191,0.07027199864387512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,32,8,128,1,float16,fp8,8191,0.0565775990486145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,1,0.023921599984169005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,1,0.024297599494457246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,3,0.023574399948120116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,3,0.02454400062561035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,7,0.023414400219917298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,7,0.02451840043067932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,15,0.0234607994556427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,15,0.024438400566577912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,31,0.02422879934310913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,31,0.024297599494457246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,63,0.02361920028924942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,63,0.024481600522994994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,127,0.023800000548362732
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,127,0.024726399779319765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,255,0.026283198595047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,255,0.026228800415992737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,511,0.03144319951534271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,511,0.030103999376296996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,1023,0.051585602760314944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,1023,0.04280799925327301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,2047,0.07333120107650756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,2047,0.05873280167579651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,float16,4095,0.11744159460067749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,1,128,1,float16,fp8,4095,0.0839024007320404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,1,0.027497598528862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,1,0.029135999083518983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,3,0.027305600047111512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,3,0.029054400324821473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,7,0.027511999011039734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,7,0.02928000092506409
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,15,0.027265599370002745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,15,0.028998398780822755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,31,0.027459201216697694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,31,0.02900800108909607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,63,0.027580800652503967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,63,0.029278400540351867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,127,0.02720640003681183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,127,0.029232001304626463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,255,0.03395360112190247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,255,0.03308959901332855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,2047,0.12051680088043212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,511,0.05193600058555603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,511,0.044865599274635314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,1023,0.07787200212478637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,1023,0.06349599957466126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,2047,0.08854079842567444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,float16,4095,0.2084575891494751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,1,0.03486399948596954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,2,128,1,float16,fp8,4095,0.13880800008773803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,1,0.038247999548912046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,3,0.03518880009651184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,3,0.037883201241493226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,7,0.0350383996963501
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,7,0.03835360109806061
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,15,0.034832000732421875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,15,0.03835360109806061
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,31,0.03490720093250275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,31,0.038422399759292604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,63,0.03462400138378143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,63,0.03819040060043335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,127,0.03849759995937348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,127,0.038159999251365664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,255,0.0567471981048584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,255,0.05090240240097046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,511,0.07709280252456666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,511,0.06798239946365356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,1023,0.12428959608078002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,1,0.04927360117435455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,1023,0.09682880043983459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,2047,0.14613280296325684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,2047,0.20979039669036864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,1,0.055264002084732054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,float16,4095,0.3776576042175293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,15,0.049332800507545474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,4,128,1,float16,fp8,4095,0.24583840370178223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,3,0.04935680031776428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,31,0.05595679879188538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,3,0.05575039982795715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,7,0.04949919879436493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,7,0.05564960241317749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,15,0.05538560152053833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,31,0.04975999891757965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,63,0.05189759731292724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,63,0.055687999725341795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,127,0.061868798732757566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,127,0.06032159924507141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,255,0.0858784019947052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,255,0.08034560084342957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,511,0.12591040134429932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,511,0.10966880321502685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,1023,0.21569440364837647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,1023,0.16200480461120606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,2047,0.3851567983627319
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,2047,0.2582672119140625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,1,0.03001280128955841
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,1,0.03189600110054016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,3,0.030623999238014222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,3,0.03211359977722168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,fp8,4095,0.4591072082519531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,32,8,128,1,float16,float16,4095,0.7204271793365479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,7,0.03044320046901703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,7,0.03252159953117371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,15,0.03065760135650635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,15,0.0317984014749527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,31,0.030796799063682555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,31,0.031934401392936705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,63,0.03039039969444275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,127,0.030883198976516722
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,63,0.032662400603294374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,127,0.031974399089813234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,255,0.03774400055408478
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,255,0.035872000455856326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,511,0.056462401151657106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,511,0.04752320051193237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,float16,1023,0.08258079886436462
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,1,128,1,float16,fp8,1023,0.06788960099220276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,1,0.03793280124664307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,1,0.04050399959087372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,3,0.03773280084133148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,3,0.04084640145301819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,7,0.0377263993024826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,7,0.040982401371002196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,15,0.037775999307632445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,15,0.040652799606323245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,31,0.03747040033340454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,31,0.04080640077590943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,63,0.03797439932823181
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,63,0.04089600145816803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,127,0.04191839993000031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,127,0.0410863995552063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,255,0.06049280166625977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,255,0.05278559923171997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,511,0.0824176013469696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,511,0.07118719816207886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,float16,1023,0.13016799688339234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,2,128,1,float16,fp8,1023,0.10021120309829712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,1,0.05219519734382629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,1,0.05823839902877807
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,3,0.05184800028800964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,3,0.05856159925460815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,7,0.05255680084228516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,31,0.058064001798629764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,7,0.058455997705459596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,15,0.052369600534439086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,15,0.05865280032157898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,31,0.05249119997024536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,63,0.05657280087471008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,63,0.05867840051651001
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,127,0.06479679942131042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,127,0.06432960033416749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,255,0.08723999857902527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,255,0.0830735981464386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,511,0.13083360195159913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,511,0.11224000453948975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,float16,1023,0.21988799571990966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,4,128,1,float16,fp8,1023,0.16469919681549072
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,1,0.0807856023311615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,7,0.09375200271606446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,1,0.09419199824333191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,3,0.0804032027721405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,3,0.09411200284957885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,7,0.08071680068969726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,15,0.08013439774513245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,15,0.0939743995666504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,31,0.08098880052566529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,31,0.09409919977188111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,63,0.09078720211982727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,63,0.10005760192871094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,127,0.10023679733276367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,127,0.11019999980926513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,255,0.1433568000793457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,255,0.13426079750061035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,511,0.2275696039199829
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,511,0.19435839653015136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,1,0.04483200013637543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,float16,1023,0.3925679922103882
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,1,0.047896000742912295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,3,0.04489920139312744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,32,8,128,1,float16,fp8,1023,0.29577438831329345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,3,0.0480320006608963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,31,0.04789760112762451
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,7,0.0447952002286911
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,7,0.0481440007686615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,15,0.04476799964904785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,31,0.044772800803184507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,15,0.04789440035820007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,63,0.044843199849128726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,63,0.04811840057373047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,127,0.05268160104751587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,127,0.04880160093307495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,1,0.06516960263252258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,255,0.06761599779129028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,255,0.06193119883537292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,float16,511,0.09049280285835266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,1,128,1,float16,fp8,511,0.07887679934501649
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,1,0.05874400138854981
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,3,0.05856159925460815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,3,0.06495839953422547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,7,0.05925920009613037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,7,0.06461920142173767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,15,0.05914400219917297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,15,0.06493279933929444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,31,0.059569597244262695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,31,0.06533120274543762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,255,0.08896480202674865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,63,0.0652127981185913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,63,0.06589279770851135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,127,0.07230240106582642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,127,0.07191680073738098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,255,0.0957647979259491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,3,0.10021920204162597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,float16,511,0.13825440406799316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,2,128,1,float16,fp8,511,0.11844480037689209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,1,0.08698559999465942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,1,0.09936479926109314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,3,0.0870032012462616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,7,0.08780639767646789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,7,0.10028640031814576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,15,0.08740800023078918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,15,0.0997983992099762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,31,0.08738880157470703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,31,0.09947519898414611
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,63,0.09688320159912109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,63,0.1080623984336853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,127,0.1035599946975708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,127,0.11421760320663452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,255,0.14456000328063964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,255,0.14030719995498658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,3,0.14126559495925903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,float16,511,0.22957279682159423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,1,0.14143199920654298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,7,0.168668794631958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,4,128,1,float16,fp8,511,0.20023200511932374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,1,0.16870239973068238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,3,0.1674896001815796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,7,0.1417840003967285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,15,0.14072959423065184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,15,0.16840319633483886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,31,0.15270400047302246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,31,0.169486403465271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,63,0.155731201171875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,63,0.18210079669952392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,127,0.16640160083770753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,127,0.19265760183334352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,255,0.24533441066741943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,255,0.24144959449768066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,1,0.020532800257205962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,1,0.02157920002937317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,float16,511,0.43569121360778806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,3,0.020078399777412416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,32,8,128,1,float16,fp8,511,0.36220479011535645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,3,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,7,0.020268799364566804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,7,0.021451200544834136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,15,0.020287999510765077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,31,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,15,0.02133280038833618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,31,0.021091200411319733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,63,0.02035519927740097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,63,0.021057599782943727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,127,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,127,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,255,0.0217616006731987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,255,0.02327840030193329
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,511,0.025276800990104674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,511,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,1023,0.024977600574493407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,1023,0.026208001375198364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,2047,0.02542240023612976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,2047,0.02646079957485199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,4095,0.02717120051383972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,4095,0.02763360142707825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,float16,8191,0.032764801383018495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,1,128,1,float16,fp8,8191,0.031651198863983154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,1,0.02025440037250519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,1,0.021195200085639954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,3,0.020587199926376344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,3,0.021076799929142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,7,0.02022880017757416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,7,0.021396799385547637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,15,0.02053920030593872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,15,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,31,0.020427200198173522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,31,0.02117599993944168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,63,0.020608000457286835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,63,0.02099519968032837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,127,0.020233599841594695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,127,0.021456000208854676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,255,0.02190079987049103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,255,0.023313599824905395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,511,0.0251008003950119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,511,0.027116799354553224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,1023,0.025336000323295593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,1023,0.026214399933815004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,2047,0.02688319981098175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,2047,0.027024000883102417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,4095,0.031089600920677186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,4095,0.030929601192474364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,float16,8191,0.049860799312591554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,2,128,1,float16,fp8,8191,0.04074879884719849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,1,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,1,0.021270400285720824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,3,0.02044160068035126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,3,0.02134400010108948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,7,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,7,0.021433599293231964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,15,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,15,0.021425600349903106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,31,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,31,0.02133280038833618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,63,0.020454399287700653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,63,0.021320000290870667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,127,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,127,0.02139039933681488
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,255,0.022166399657726286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,255,0.02339999973773956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,511,0.0253248006105423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,511,0.02694559991359711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,1023,0.02651199996471405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,1023,0.027163198590278624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,2047,0.031067198514938353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,2047,0.030454400181770324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,4095,0.04925599992275238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,4095,0.0370959997177124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,float16,8191,0.07128319740295411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,4,128,1,float16,fp8,8191,0.05628479719161987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,1,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,1,0.021585600078105928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,3,0.020846399664878845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,3,0.021401600539684297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,7,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,7,0.02152799963951111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,15,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,15,0.021486400067806243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,31,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,31,0.02147520035505295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,63,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,63,0.021521599590778352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,127,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,127,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,255,0.02245440036058426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,255,0.023580799996852874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,511,0.025729599595069885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,511,0.027580800652503967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,1023,0.030979201197624207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,1023,0.030484798550605773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,2047,0.048871999979019164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,2047,0.039575999975204466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,4095,0.07067840099334717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,4095,0.05790240168571472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,float16,8191,0.11437760591506958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,32,8,128,1,float16,fp8,8191,0.08081759810447693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,1,0.0664255976676941
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,1,0.07204800248146057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,3,0.06613600254058838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,3,0.07262560129165649
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,7,0.06657440066337586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,7,0.07258880138397217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,63,0.075382399559021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,15,0.06646080017089843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,15,0.07227680087089539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,31,0.06691520214080811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,31,0.07229599952697754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,63,0.0757856011390686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,127,0.08051199913024902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,127,0.08236799836158752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,float16,255,0.10268640518188477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,1,128,1,float16,fp8,255,0.09611679911613465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,1,0.09403679966926574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,1,0.10665760040283204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,3,0.09400479793548584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,3,0.10608320236206055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,7,0.09452160000801087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,63,0.1048975944519043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,7,0.1062127947807312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,15,0.09534080028533935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,15,0.10661439895629883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,31,0.09890400171279908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,31,0.10676000118255616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,63,0.11576800346374512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,127,0.11269760131835938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,1,0.17420799732208253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,127,0.11961760520935058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,float16,255,0.15352799892425537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,2,128,1,float16,fp8,255,0.1468559980392456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,1,0.15099359750747682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,3,0.15055520534515382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,3,0.17462559938430786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,7,0.15193599462509155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,7,0.1740383982658386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,15,0.15197919607162474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,15,0.1750480055809021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,31,0.16037919521331787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,31,0.18241920471191406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,63,0.1627279996871948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,63,0.18949919939041138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,127,0.17271840572357178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,1,0.30976479053497313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,127,0.19428960084915162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,float16,255,0.2444256067276001
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,4,128,1,float16,fp8,255,0.24696478843688965
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,1,0.2608799934387207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,3,0.2606415987014771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,3,0.3110304117202759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,7,0.2644815921783447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,7,0.3100032091140747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,15,0.2760623931884766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,15,0.3113840103149414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,31,0.27999680042266845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,31,0.33966240882873533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,63,0.2831104040145874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,63,0.3383375883102417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,255,0.44834561347961427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,float16,127,0.3061984062194824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,127,0.3559999942779541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,1,0.10926719903945922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,32,8,128,1,float16,fp8,255,0.4459407806396484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,1,0.1193552017211914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,3,0.11011359691619874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,3,0.1205183982849121
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,7,0.11229599714279175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,7,0.1208575963973999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,15,0.11198240518569946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,15,0.1210271954536438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,127,0.12704479694366455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,31,0.11715840101242066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,127,0.13325920104980468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,31,0.12435040473937989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,float16,63,0.12042720317840576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,1,128,1,float16,fp8,63,0.1307423949241638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,1,0.16569919586181642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,1,0.18732800483703613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,3,0.16788959503173828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,3,0.18452320098876954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,7,0.16857919692993165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,7,0.18584959506988524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,15,0.16945120096206664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,15,0.18842400312423707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,31,0.1711967945098877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,127,0.18838720321655272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,31,0.19885920286178588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,float16,63,0.1755295991897583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,63,0.20138719081878662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,1,0.32520160675048826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,2,128,1,float16,fp8,127,0.20426080226898194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,3,0.32736959457397463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,7,0.28375680446624757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,1,0.28230240345001223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,3,0.28320000171661375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,7,0.33118720054626466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,15,0.28567678928375245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,15,0.33615679740905763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,31,0.29112160205841064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,31,0.3419680118560791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,63,0.2923295974731445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,63,0.34702880382537843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,float16,127,0.31360640525817873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,4,128,1,float16,fp8,127,0.3548671960830688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,1,0.5165296077728272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,1,0.5999855995178223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,3,0.5191504001617432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,3,0.6006432056427002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,7,0.5153007984161377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,7,0.6138463973999023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,15,0.5228032112121582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,15,0.6317840099334717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,31,0.522052812576294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,31,0.632528018951416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,63,0.5300928115844726
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,1,0.020550400018692017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,1,0.021238400042057036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,63,0.6325407981872558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,3,0.02012320011854172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,float16,127,0.5645823955535889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,3,0.020852799713611602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,7,0.020292800664901734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,32,8,128,1,float16,fp8,127,0.6474575996398926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,7,0.02144960016012192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,15,0.020265600085258482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,127,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,15,0.02101919949054718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,31,0.020395199954509734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,31,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,63,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,63,0.021328000724315642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,127,0.020396800339221956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,255,0.021966400742530822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,255,0.023446400463581086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,511,0.024926400184631346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,511,0.026643198728561402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,1023,0.024849599599838255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,1023,0.025942400097846985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,2047,0.026875200867652892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,2047,0.026931199431419372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,4095,0.03224639892578125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,4095,0.031091201305389404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,float16,8191,0.05003359913825989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,1,128,1,float16,fp8,8191,0.04086079895496368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,1,0.020547200739383698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,1,0.02154400050640106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,3,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,3,0.021272000670433045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,7,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,7,0.021590399742126464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,15,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,15,0.021614399552345277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,31,0.020531199872493744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,255,0.023342399299144743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,31,0.021478399634361267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,511,0.02547999918460846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,63,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,63,0.021331200003623964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,127,0.021184000372886657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,127,0.021404799818992615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,255,0.02250880002975464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,511,0.02696320116519928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,1023,0.026643198728561402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,1023,0.02707839906215668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,2047,0.030487999320030212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,2047,0.030590400099754333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,4095,0.0498879998922348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,4095,0.04079039990901947
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,float16,8191,0.07153279781341552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,2,128,1,float16,fp8,8191,0.056251198053359985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,1,0.020880000293254854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,1,0.02160799950361252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,3,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,3,0.02192160040140152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,7,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,7,0.02167679965496063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,15,0.0208639994263649
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,15,0.02171040028333664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,31,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,31,0.02184640020132065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,63,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,63,0.021454399824142455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,127,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,127,0.021619200706481934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,255,0.022812800109386445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,255,0.0237744003534317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,511,0.02566719949245453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,511,0.027619200944900512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,4095,0.07175679802894593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,1023,0.03200959861278534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,1023,0.03031519949436188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,2047,0.049726399779319766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,2047,0.0363072007894516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,4095,0.056944000720977786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,float16,8191,0.1155743956565857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,4,128,1,float16,fp8,8191,0.0824015974998474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,1,0.02110079973936081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,1,0.021958400309085847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,3,0.02096959948539734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,3,0.02200320065021515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,7,0.021078400313854218
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,7,0.022224000096321105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,15,0.021084800362586975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,15,0.02202720046043396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,31,0.02109439969062805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,31,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,63,0.02112800031900406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,63,0.02211360037326813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,127,0.0213919997215271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,127,0.022129599750041962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,255,0.023196800053119658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,255,0.024225600063800812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,511,0.027883198857307435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,4095,0.11321120262145996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,511,0.028059199452400208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,1023,0.04858720004558563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,1023,0.036822399497032164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,2047,0.06975679993629455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,2047,0.056715202331542966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,4095,0.0808143973350525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,fp8,8191,0.13254400491714477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,1,0.021984000504016877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,32,8,128,1,float16,float16,8191,0.19955999851226808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,1,0.022652800381183624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,3,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,3,0.022628800570964815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,7,0.02187040001153946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,7,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,15,0.02200479954481125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,15,0.02282879948616028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,31,0.022126400470733644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,31,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,63,0.021937599778175353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,63,0.02303680032491684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,127,0.021854400634765625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,127,0.0230320006608963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,255,0.023956799507141115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,255,0.024991999566555022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,511,0.02717280089855194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,511,0.028575998544692994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,1023,0.031249600648880004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,1023,0.031350401043891904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,2047,0.05132160186767578
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,2047,0.03954559862613678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,4095,0.07273439764976501
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,4095,0.057948797941207886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,8191,0.11704319715499878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,8191,0.08338239789009094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,1,0.022203199565410614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,float16,16383,0.20425760746002197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,1,128,1,float16,fp8,16383,0.13590079545974731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,1,0.023086400330066682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,3,0.02221599966287613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,3,0.023039999604225158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,7,0.0223471999168396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,7,0.023151999711990355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,15,0.02210720032453537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,15,0.023102399706840516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,31,0.02223840057849884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,31,0.023175999522209167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,63,0.02231999933719635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,63,0.023145599663257597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,127,0.022363199293613432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,127,0.023240000009536743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,255,0.02412479966878891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,255,0.02498079985380173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,2047,0.0715936005115509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,511,0.027934399247169495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,511,0.028964799642562867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,4095,0.11664960384368897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,1023,0.04996640086174011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,1023,0.03896960020065308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,2047,0.057089602947235106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,4095,0.08208799958229065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,8191,0.13351520299911498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,8191,0.20182080268859864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,1,0.026078400015830994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,float16,16383,0.37620959281921384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,1,0.027492800354957582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,2,128,1,float16,fp8,16383,0.23362879753112792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,3,0.026128000020980834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,3,0.027595201134681703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,7,0.026182401180267333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,7,0.02768000066280365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,15,0.02605920135974884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,15,0.02743520140647888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,31,0.026020801067352294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,31,0.027617600560188294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,63,0.026153600215911864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,63,0.027780801057815552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,127,0.026131200790405273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,127,0.027636799216270446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,255,0.0305184006690979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,255,0.03174560070037842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,511,0.049446401000022885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,511,0.04274879992008209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,1023,0.07498559951782227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,1023,0.061624002456665036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,2047,0.11851999759674073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,2047,0.08602560162544251
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,4095,0.20327360630035402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,4095,0.13673440217971802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,8191,0.3731872081756592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,8191,0.23629279136657716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,1,0.033606401085853575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,1,0.03655839860439301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,3,0.03322399854660034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,3,0.03663839995861053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,7,0.03334720134735107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,fp8,16383,0.43558239936828613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,4,128,1,float16,float16,16383,0.7169023990631104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,15,0.03372479975223541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,7,0.03682560026645661
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,15,0.036550399661064145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,31,0.03365600109100342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,31,0.036620798707008365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,63,0.033371201157569884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,63,0.03666560053825378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,127,0.0369488000869751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,127,0.03650560081005096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,255,0.054497599601745605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,255,0.04779199957847595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,511,0.07550560235977173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,511,0.06707360148429871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,1023,0.12213120460510254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,1023,0.09444479942321778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,2047,0.2063040018081665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,2047,0.14325920343399048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,4095,0.3755951881408691
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,4095,0.24368159770965575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,1,0.01759680062532425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,1,0.018324799835681915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,3,0.01738079935312271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,8191,0.44303040504455565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,3,0.018251200020313264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,7,0.017449599504470826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,8191,0.7160863876342773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,7,0.01839679926633835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,15,0.01730400025844574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,15,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,31,0.017347200214862822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,31,0.018423999845981597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,fp8,16383,0.8415040016174317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,63,0.018454399704933167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,63,0.01743520051240921
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,16,8,128,1,float16,float16,16383,1.4011167526245116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,127,0.01736319959163666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,127,0.01849920004606247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,255,0.0190080001950264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,255,0.020318399369716644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,4095,0.022462399303913118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,511,0.02441119998693466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,511,0.02187040001153946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,1023,0.02178560048341751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,1023,0.024822400510311128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,2047,0.02205760031938553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,2047,0.023104000091552734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,4095,0.023764799535274505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,8191,0.023904000222682954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,8191,0.0249439999461174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,float16,16383,0.025915199518203737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,1,128,1,float16,fp8,16383,0.02744159996509552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,1,0.018508799374103546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,1,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,3,0.018164800107479097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,31,0.018279999494552612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,3,0.019017599523067474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,7,0.018113599717617036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,7,0.01908160001039505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,15,0.018087999522686006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,15,0.019215999543666838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,31,0.01932159960269928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,63,0.01835840046405792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,63,0.01900479942560196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,127,0.018219199776649476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,127,0.019215999543666838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,255,0.019819200038909912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,255,0.021087999641895293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,511,0.02298080027103424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,511,0.025051200389862062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,1023,0.022547200322151184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,1023,0.023643200099468232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,2047,0.022569599747657775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,2047,0.02402079999446869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,4095,0.023710399866104126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,4095,0.024883200228214265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,8191,0.025231999158859254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,8191,0.026022401452064515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,float16,16383,0.02747200131416321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,2,128,1,float16,fp8,16383,0.0286655992269516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,1,0.019808000326156615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,1,0.020553599298000335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,3,0.020068800449371337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,3,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,7,0.01977279931306839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,7,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,15,0.019972799718379973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,15,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,31,0.0197952002286911
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,31,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,63,0.019945600628852846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,63,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,127,0.01984640061855316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,127,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,255,0.021352000534534454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,255,0.02287680059671402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,511,0.024777600169181825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,511,0.026604801416397095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,1023,0.02396000027656555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,1023,0.025553598999977112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,8191,0.028145599365234374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,2047,0.024542400240898134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,2047,0.02587360143661499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,4095,0.025868800282478333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,8191,0.027735999226570128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,4095,0.02647840082645416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,float16,16383,0.031379199028015135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,4,128,1,float16,fp8,16383,0.03215200006961823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,1,0.020150400698184967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,1,0.020737600326538087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,3,0.019836799800395967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,3,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,7,0.020100800693035124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,7,0.020894399285316466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,15,0.020019200444221497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,15,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,31,0.020108799636363982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,31,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,63,0.01989919990301132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,63,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,127,0.020147199928760528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,127,0.02099519968032837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,255,0.021588799357414246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,255,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,511,0.024803200364112855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,511,0.02648639976978302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,1023,0.02446240037679672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,1023,0.025736001133918763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,2047,0.02504960000514984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,2047,0.026383998990058898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,4095,0.027060800790786745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,4095,0.027140799164772033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,8191,0.03059839904308319
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,8191,0.03083840012550354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,float16,16383,0.04816479980945587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,16,8,128,1,float16,fp8,16383,0.038315200805664064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,1,0.018408000469207764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,1,0.01915999948978424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,15,0.019139200448989868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,3,0.018190400302410127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,3,0.019145600497722626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,7,0.018211199343204497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,7,0.019172799587249757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,15,0.01828639954328537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,31,0.018515199422836304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,31,0.01926880031824112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,63,0.018320000171661376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,63,0.019166399538517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,127,0.018139199912548067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,1023,0.022444799542427063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,127,0.01934400051832199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,255,0.01990240067243576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,255,0.021273599565029146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,511,0.02314079999923706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,511,0.025135999917984007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,1023,0.023921599984169005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,2047,0.022852799296379088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,2047,0.024320000410079957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,4095,0.023785600066185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,4095,0.024956800043582916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,8191,0.025220799446105956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,8191,0.02643679976463318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,float16,16383,0.027497598528862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,1,128,1,float16,fp8,16383,0.02850080132484436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,1,0.020110400021076204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,1,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,3,0.019900800287723543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,3,0.02096800059080124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,7,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,7,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,15,0.02009280025959015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,15,0.0209647998213768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,31,0.020351999998092653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,31,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,63,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,63,0.0209184005856514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,127,0.02006399929523468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,127,0.020923200249671935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,255,0.021644799411296843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,255,0.023083199560642243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,511,0.024438400566577912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,511,0.026598399877548216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,1023,0.02417919933795929
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,1023,0.025748801231384278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,2047,0.024633599817752837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,2047,0.026063999533653258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,4095,0.02547520101070404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,4095,0.026697599887847902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,8191,0.027744001150131224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,8191,0.028336000442504884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,float16,16383,0.032574400305747986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,2,128,1,float16,fp8,16383,0.03176159858703613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,1,0.02007199972867966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,1,0.021236799657344818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,3,0.020235200226306916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,3,0.02080959975719452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,7,0.020160000026226043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,7,0.02088640034198761
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,15,0.020158399641513825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,15,0.020972800254821778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,31,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,31,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,63,0.020150400698184967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,63,0.020880000293254854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,127,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,127,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,255,0.021891200542449953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,255,0.022908799350261688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,511,0.024659200012683867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,511,0.02659359872341156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,1023,0.02431039959192276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,1023,0.026169601082801818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,2047,0.025148800015449523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,2047,0.026499199867248534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,4095,0.02715040147304535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,4095,0.027260801196098326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,8191,0.030947199463844298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,8191,0.031358399987220766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,float16,16383,0.049486398696899414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,4,128,1,float16,fp8,16383,0.0384880006313324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,1,0.0203792005777359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,1,0.020793600380420683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,3,0.020151999592781068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,3,0.021009600162506102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,7,0.020185600221157073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,7,0.021236799657344818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,15,0.02038719952106476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,15,0.020924800634384157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,31,0.02027679979801178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,31,0.0208639994263649
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,63,0.020227199792861937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,63,0.02096160054206848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,127,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,127,0.021191999316215515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,255,0.021724799275398256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,255,0.023124800622463228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,511,0.025041601061820982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,511,0.02656320035457611
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,1023,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,1023,0.026044800877571106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,2047,0.02646079957485199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,2047,0.02698560059070587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,4095,0.03197599947452545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,4095,0.03056640028953552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,8191,0.04898079931735992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,8191,0.03824479877948761
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,float16,16383,0.07059040069580078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,16,8,128,1,float16,fp8,16383,0.055764800310134886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,1,0.021249599754810333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,1,0.02194560021162033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,3,0.0212351992726326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,3,0.021617600321769716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,7,0.021113599836826324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,7,0.022177599370479584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,15,0.02138399928808212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,15,0.021924799680709837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,31,0.02120800018310547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,31,0.022023999691009523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,63,0.02098720073699951
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,63,0.021956799924373625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,511,0.027433601021766663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,127,0.021118399500846863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,1023,0.02728480100631714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,127,0.022155199944972993
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,2047,0.02901119887828827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,255,0.02298240065574646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,255,0.023999999463558196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,511,0.026105600595474242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,1023,0.027161601185798644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,2047,0.031248000264167786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,4095,0.050329601764678954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,4095,0.03935360014438629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,8191,0.07186239957809448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,8191,0.057443201541900635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,float16,16383,0.11619679927825928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,1,128,1,float16,fp8,16383,0.08206719756126404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,1,0.021348799765110015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,1,0.022281600534915923
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,3,0.021241599321365358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,3,0.022040000557899474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,7,0.02123199999332428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,7,0.021859200298786165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,15,0.02127680033445358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,15,0.022089600563049316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,31,0.021265600621700288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,31,0.02192160040140152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,63,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,63,0.022303999960422517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,127,0.02133280038833618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,127,0.022096000611782074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,255,0.023363199830055238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,255,0.02396959960460663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,511,0.025956800580024718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,511,0.02776319980621338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,1023,0.029902398586273193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,1023,0.031201601028442383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,8191,0.08240960240364074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,2047,0.05033439993858337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,2047,0.041440001130104064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,4095,0.0715279996395111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,4095,0.05740640163421631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,8191,0.11557279825210572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,float16,16383,0.20318880081176757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,1,0.021507200598716737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,2,128,1,float16,fp8,16383,0.13379839658737183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,15,0.021488000452518464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,1,0.02239679992198944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,3,0.02159679979085922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,3,0.022407999634742735
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,7,0.021593600511550903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,7,0.022265599668025972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,15,0.022492800652980805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,31,0.021587200462818146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,31,0.022414399683475493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,63,0.021622399985790252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,63,0.02245440036058426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,127,0.021747200191020964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,127,0.022516800463199614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,255,0.02341119945049286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,255,0.024663999676704407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,511,0.02688640058040619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,511,0.02842240035533905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,1023,0.04875519871711731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,1023,0.03747679889202118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,2047,0.07080320119857789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,2047,0.056383997201919556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,4095,0.11400320529937744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,4095,0.08141440153121948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,8191,0.20093278884887694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,8191,0.13285599946975707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,1,0.025484800338745117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,1,0.027001601457595826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,float16,16383,0.3731935977935791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,4,128,1,float16,fp8,16383,0.23105919361114502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,3,0.025513601303100587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,3,0.02677760124206543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,7,0.02555040121078491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,63,0.026851201057434083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,7,0.02680320143699646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,15,0.0255295991897583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,15,0.026855999231338502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,31,0.025433599948883057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,31,0.026913601160049438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,63,0.0255295991897583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,127,0.025833600759506227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,1023,0.07328959703445434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,127,0.02688480019569397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,255,0.031009599566459656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,255,0.03107360005378723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,511,0.04782879948616028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,511,0.041249600052833554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,1023,0.06047520041465759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,2047,0.11639679670333862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,2047,0.0853663980960846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,4095,0.20039680004119872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,4095,0.1371216058731079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,8191,0.37239038944244385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,8191,0.23751039505004884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,1,0.020206399261951447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,1,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,3,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,3,0.021011200547218323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,7,0.02024960070848465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,7,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,fp8,16383,0.43294081687927244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,15,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,16,8,128,1,float16,float16,16383,0.7169856071472168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,15,0.02115360051393509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,31,0.02005600035190582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,31,0.020960000157356263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,63,0.020233599841594695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,63,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,127,0.0201664000749588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,255,0.021875199675559998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,127,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,255,0.023057599365711213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,511,0.024806399643421174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,511,0.026609599590301514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,1023,0.024193599820137024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,1023,0.02592960000038147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,2047,0.02505599856376648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,2047,0.026080000400543212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,4095,0.02585119903087616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,4095,0.026795199513435362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,8191,0.027676799893379213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,8191,0.027983999252319335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,float16,16383,0.03173120021820068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,1,128,1,float16,fp8,16383,0.03219200074672699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,1,0.020124800503253937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,1,0.021004800498485566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,3,0.02001120001077652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,3,0.02106720060110092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,7,0.019996799528598785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,7,0.02086080014705658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,15,0.020457600057125092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,15,0.021001599729061127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,31,0.020204800367355346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,31,0.020980800688266753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,63,0.020190399885177613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,63,0.02112320065498352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,127,0.02016319930553436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,127,0.020851199328899384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,255,0.021971200406551362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,255,0.022915199398994446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,511,0.024985599517822265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,511,0.027263998985290527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,1023,0.024540799856185912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,1023,0.025857600569725036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,2047,0.025044798851013184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,2047,0.026347199082374574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,4095,0.027110400795936584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,4095,0.027270400524139406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,8191,0.03158240020275116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,8191,0.031632000207901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,float16,16383,0.05016160011291504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,2,128,1,float16,fp8,16383,0.03791039884090423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,1,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,1,0.020931200683116914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,3,0.02045920044183731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,3,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,7,0.020342400670051573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,7,0.02120479941368103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,15,0.020187200605869295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,15,0.02112479954957962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,31,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,31,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,63,0.0204815998673439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,63,0.02093279957771301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,127,0.020308800041675568
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,127,0.021193599700927733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,255,0.021865600347518922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,255,0.02306559979915619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,511,0.024905599653720856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,511,0.026556798815727235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,1023,0.025044798851013184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,1023,0.025987198948860167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,2047,0.026846399903297423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,2047,0.027127999067306518
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,4095,0.029972800612449647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,4095,0.030553600192070006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,8191,0.048670399188995364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,8191,0.03681919872760773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,float16,16383,0.07065439820289612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,4,128,1,float16,fp8,16383,0.0563215970993042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,1,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,1,0.02138399928808212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,3,0.020284800231456755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,3,0.021167999505996703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,7,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,7,0.02122880071401596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,15,0.02059040069580078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,15,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,31,0.02050720006227493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,31,0.021116800606250763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,63,0.020444799959659577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,63,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,127,0.02027679979801178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,127,0.02144320011138916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,255,0.02216159999370575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,255,0.02321600019931793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,511,0.025591999292373657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,511,0.02698560059070587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,1023,0.026383998990058898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,1023,0.026499199867248534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,2047,0.028832000494003297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,2047,0.030395200848579405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,16383,0.11346880197525025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,4095,0.04859679937362671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,4095,0.0361519992351532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,float16,8191,0.07030240297317505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,8191,0.0560479998588562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,3,0.024347199499607085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,16,8,128,1,float16,fp8,16383,0.08112959861755371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,1,0.0234592005610466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,1,0.024823999404907225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,3,0.023689599335193635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,7,0.02353599965572357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,7,0.024369600415229797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,15,0.023451200127601622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,15,0.02446240037679672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,31,0.0234607994556427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,31,0.024644799530506134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,63,0.023532800376415253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,63,0.024369600415229797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,127,0.023622399568557738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,127,0.024377599358558655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,255,0.02540639936923981
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,255,0.026311999559402464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,511,0.02848159968852997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,511,0.030393600463867188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,1023,0.051209598779678345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,1023,0.04330880045890808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,2047,0.07240319848060608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,2047,0.058027201890945436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,4095,0.11700320243835449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,4095,0.08305919766426087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,float16,8191,0.20492639541625976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,1,0.02746239900588989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,1,0.028907200694084166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,1,128,1,float16,fp8,8191,0.13596800565719605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,3,0.02735840082168579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,3,0.029108801484107973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,7,0.027475199103355406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,7,0.028729599714279175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,15,0.027582401037216188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,15,0.029065600037574767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,31,0.027428799867630006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,31,0.02886880040168762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,63,0.02727360129356384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,63,0.029025599360466003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,127,0.027423998713493346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,127,0.02884320020675659
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,255,0.03312320113182068
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,255,0.0329120010137558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,511,0.05142880082130432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,511,0.042788800597190854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,1023,0.07687519788742066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,1023,0.06380000114440917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,2047,0.11942880153656006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,2047,0.08729439973831177
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,4095,0.2049407958984375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,4095,0.13861279487609862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,1,0.03488479852676392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,1,0.038134399056434634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,float16,8191,0.37883360385894777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,2,128,1,float16,fp8,8191,0.24245600700378417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,3,0.0354095995426178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,3,0.03770560026168823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,7,0.03559199869632721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,7,0.038134399056434634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,15,0.034835198521614076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,15,0.03825919926166534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,31,0.03531999886035919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,31,0.03810079991817474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,63,0.035104000568389894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,511,0.07695840001106262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,63,0.037747201323509214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,127,0.037520000338554384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,127,0.03871839940547943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,255,0.05624319911003113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,2047,0.20912959575653076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,255,0.04792479872703552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,511,0.06785759925842286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,1023,0.12337119579315185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,1023,0.09688320159912109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,2047,0.14648959636688233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,4095,0.3787055969238281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,4095,0.2445199966430664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,1,0.049167999625205995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,1,0.05541279911994934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,3,0.04926080107688904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,fp8,8191,0.4465439796447754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,4,128,1,float16,float16,8191,0.7188399791717529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,3,0.05596320033073425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,7,0.049239999055862425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,7,0.05553600192070007
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,15,0.04928160011768341
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,15,0.05552800297737122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,31,0.049534401297569274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,31,0.055529600381851195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,63,0.050900799036026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,63,0.055622398853302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,511,0.12454880475997925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,127,0.06247360110282898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,127,0.05829600095748901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,255,0.08457120060920716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,255,0.07933279871940613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,511,0.10873919725418091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,1023,0.2158047914505005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,1023,0.16032639741897584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,2047,0.3851104021072388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,2047,0.25794239044189454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,1,0.029795199632644653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,1,0.03221119940280914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,4095,0.45689120292663576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,4095,0.7181344032287598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,3,0.030134400725364684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,3,0.031358399987220766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,7,0.03033280074596405
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,7,0.032323199510574344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,15,0.030921599268913268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,15,0.03183520138263703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,31,0.02998720109462738
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,31,0.03195039927959442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,63,0.030350399017333985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,fp8,8191,0.8584768295288085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,63,0.03218559920787811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,16,8,128,1,float16,float16,8191,1.4004143714904784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,127,0.030206400156021117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,127,0.03149600028991699
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,255,0.03693599998950958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,255,0.03547520041465759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,511,0.05801600217819214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,511,0.04691999852657318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,1023,0.06645439863204956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,1023,0.09016000032424927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,float16,2047,0.12311840057373047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,1,128,1,float16,fp8,2047,0.09064800143241883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,1,0.03773599863052368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,1,0.0404911994934082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,3,0.03780319988727569
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,3,0.04074560105800629
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,7,0.03772799968719483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,7,0.04106239974498749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,15,0.03781920075416565
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,15,0.04105120003223419
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,31,0.03761120140552521
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,31,0.04048320055007935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,255,0.052611202001571655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,63,0.0377375990152359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,63,0.04085119962692261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,127,0.041652798652648926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,127,0.04068160057067871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,255,0.05983999967575073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,511,0.08174239993095397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,511,0.07100319862365723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,1023,0.12778559923171998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,1023,0.09946240186691284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,float16,2047,0.2117072105407715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,1,0.051953601837158206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,2,128,1,float16,fp8,2047,0.14908159971237184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,1,0.058715200424194335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,3,0.052779197692871094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,3,0.05841919779777527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,7,0.052147197723388675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,7,0.058324801921844485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,15,0.05249119997024536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,15,0.057956802845001223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,31,0.0525551974773407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,31,0.05876320004463196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,63,0.055667197704315184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,63,0.05853599905967712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,127,0.06307839751243591
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,127,0.06136000156402588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,255,0.08736000061035157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,255,0.08264639973640442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,511,0.1291632056236267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,511,0.11228959560394287
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,1023,0.21726720333099364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,1023,0.16520960330963136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,1,0.08095200061798095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,float16,2047,0.38533759117126465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,4,128,1,float16,fp8,2047,0.26112799644470214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,1,0.09438080191612244
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,3,0.08083360195159912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,3,0.09423999786376953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,7,0.08061599731445312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,7,0.09461119771003723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,15,0.08072959780693054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,15,0.09369440078735351
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,31,0.08128160238265991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,31,0.09472799897193909
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,63,0.08920639753341675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,63,0.09812639951705933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,127,0.09959359765052796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,127,0.1085584044456482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,255,0.1395359992980957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,255,0.1340831995010376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,511,0.2263871908187866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,511,0.19418079853057862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,1023,0.39046080112457277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,1023,0.2968575954437256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,1,0.04446240067481995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,1,0.047619199752807616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,3,0.044915199279785156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,float16,2047,0.7309999942779541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,3,0.047712001204490664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,16,8,128,1,float16,fp8,2047,0.4853807926177979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,7,0.04468640089035034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,7,0.04796960055828094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,15,0.04447360038757324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,15,0.04816479980945587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,31,0.04488480091094971
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,31,0.04756479859352112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,63,0.044249600172042845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,63,0.04792479872703552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,127,0.048158401250839235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,127,0.04774079918861389
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,255,0.06618720293045044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,255,0.05883839726448059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,511,0.08890720009803772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,511,0.07818400263786315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,float16,1023,0.1365455985069275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,1,0.05915679931640625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,1,128,1,float16,fp8,1023,0.1083232045173645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,1,0.06460319757461548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,3,0.05904160141944885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,3,0.06508960127830506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,7,0.059087997674942015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,7,0.0644432008266449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,15,0.05907999873161316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,15,0.06513919830322265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,31,0.05925599932670593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,31,0.06479359865188598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,63,0.06401439905166625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,511,0.13789440393447877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,63,0.06490880250930786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,127,0.07104160189628601
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,127,0.06763359904289246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,255,0.09425119757652282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,1,0.10021120309829712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,255,0.08892319798469543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,511,0.11802239418029785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,float16,1023,0.22614240646362305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,1,0.08767039775848388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,2,128,1,float16,fp8,1023,0.1739583969116211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,3,0.08706560134887695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,3,0.10016000270843506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,7,0.08769599795341491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,7,0.0998687982559204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,15,0.08662880063056946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,15,0.1000432014465332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,127,0.11298079490661621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,31,0.08744159936904908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,31,0.10072640180587769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,63,0.0944591999053955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,63,0.10493919849395753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,511,0.19970719814300536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,127,0.10133600234985352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,255,0.1445263981819153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,255,0.13977760076522827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,511,0.23092958927154542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,1,0.14141279458999634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,float16,1023,0.39649438858032227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,1,0.16892800331115723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,4,128,1,float16,fp8,1023,0.3006848096847534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,3,0.14144959449768066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,3,0.1678015947341919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,7,0.1412320017814636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,7,0.16837600469589234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,15,0.14160159826278687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,15,0.16833759546279908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,31,0.1515247941017151
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,31,0.16759519577026366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,63,0.153983998298645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,63,0.18152960538864135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,127,0.16275839805603026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,127,0.1912832021713257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,255,0.2395872116088867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,255,0.24034879207611085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,511,0.43163042068481444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,3,0.02033279985189438
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,1,0.020211200416088104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,511,0.35901598930358886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,1,0.021007999777793884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,3,0.021087999641895293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,15,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,7,0.02118239998817444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,float16,1023,0.7396895885467529
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,7,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,16,8,128,1,float16,fp8,1023,0.5651008129119873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,15,0.02021919935941696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,31,0.02014400064945221
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,255,0.021631999313831328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,31,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,63,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,63,0.021297599375247955
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,127,0.020235200226306916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,127,0.021476800739765167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,255,0.023086400330066682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,511,0.024817599356174468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,511,0.026321598887443544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,1023,0.024745599925518037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,1023,0.026072001457214354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,2047,0.025363200902938844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,2047,0.026576000452041625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,4095,0.027134400606155396
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,16383,0.03816480040550232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,4095,0.02757439911365509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,8191,0.030711999535560607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,fp8,8191,0.031414398550987245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,1,128,1,float16,float16,16383,0.0507968008518219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,1,0.020395199954509734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,1,0.021052800118923187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,3,0.02019200026988983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,3,0.021049599349498748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,7,0.020239999890327452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,7,0.02118239998817444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,15,0.020577600598335265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,15,0.021252800524234772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,31,0.02006080001592636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,31,0.021320000290870667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,63,0.0204815998673439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,63,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,127,0.020207999646663664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,127,0.021092799305915833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,255,0.02204640060663223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,255,0.02337760031223297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,511,0.025139200687408447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,511,0.026956799626350402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,4095,0.030726400017738343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,1023,0.025006398558616638
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,1023,0.026174399256706237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,2047,0.026824000477790832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,16383,0.05752159953117371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,2047,0.027375999093055724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,4095,0.03240320086479187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,8191,0.049055999517440795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,fp8,8191,0.03903839886188507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,2,128,1,float16,float16,16383,0.07108319997787475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,7,0.021367999911308288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,1,0.020572799444198608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,1,0.02128639966249466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,3,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,3,0.0213919997215271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,7,0.020796799659729005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,15,0.020667199790477753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,15,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,31,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,31,0.02149759978055954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,63,0.020449599623680113
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,63,0.02127680033445358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,127,0.02056480050086975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,127,0.021456000208854676
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,1023,0.026771199703216553
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,255,0.02235199958086014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,255,0.023505599796772005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,511,0.025484800338745117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,511,0.027055999636650084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,1023,0.02638559937477112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,8191,0.056360000371932985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,2047,0.028723201155662535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,2047,0.030478399991989136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,4095,0.04894079864025116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,4095,0.04134880006313324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,8191,0.07115359902381897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,float16,16383,0.11469440460205078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,4,128,1,float16,fp8,16383,0.0813696026802063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,1,0.020897600054740905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,1,0.02154559940099716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,3,0.020870399475097657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,3,0.021748800575733186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,7,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,7,0.02157759964466095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,15,0.02101919949054718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,15,0.02146719992160797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,31,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,31,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,63,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,63,0.021772800385951994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,127,0.0208624005317688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,127,0.0216511994600296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,255,0.022625599801540375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,255,0.02356639951467514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,511,0.025760000944137572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,511,0.02725279927253723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,1023,0.029875200986862183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,1023,0.030670401453971863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,2047,0.04907999932765961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,2047,0.037324801087379456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,4095,0.07109439969062806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,4095,0.05716480016708374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,8191,0.1142799973487854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,8191,0.08150560259819031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,float16,16383,0.20055840015411378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,1,0.0659503996372223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,16,8,128,1,float16,fp8,16383,0.1331552028656006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,1,0.0721552014350891
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,3,0.06616960167884826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,15,0.07252960205078125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,3,0.07209759950637817
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,7,0.06599199771881104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,7,0.07308800220489502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,15,0.06628479957580566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,127,0.07845119833946228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,31,0.06606559753417969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,31,0.0722495973110199
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,63,0.07407519817352295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,63,0.0723136007785797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,127,0.080348801612854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,255,0.10076960325241088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,255,0.09550560116767884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,float16,511,0.14295680522918702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,1,128,1,float16,fp8,511,0.12753280401229858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,1,0.09530400037765503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,7,0.10658080577850342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,1,0.10613440275192261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,3,0.09561600089073181
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,3,0.10572479963302613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,7,0.09446560144424439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,63,0.1045456051826477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,15,0.09427520036697387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,15,0.10660799741744995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,31,0.09722560048103332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,31,0.1062432050704956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,63,0.11396000385284424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,127,0.1093951940536499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,511,0.23757119178771974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,127,0.11975359916687012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,float16,255,0.1531551957130432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,255,0.14679360389709473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,2,128,1,float16,fp8,511,0.20797441005706788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,1,0.14972319602966308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,1,0.1748639941215515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,3,0.15056159496307372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,3,0.1747712016105652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,7,0.1513360023498535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,7,0.17499519586563111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,15,0.15181599855422973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,15,0.1749727964401245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,31,0.1574031949043274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,31,0.17944320440292358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,255,0.23969919681549073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,63,0.16093120574951172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,63,0.1882159948348999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,127,0.17251839637756347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,127,0.19270880222320558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,255,0.24608640670776366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,1,0.2590208053588867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,float16,511,0.41415038108825686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,4,128,1,float16,fp8,511,0.36749920845031736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,1,0.30703999996185305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,3,0.25954399108886717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,3,0.3103935956954956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,7,0.2576080083847046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,7,0.3094896078109741
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,15,0.26958560943603516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,15,0.3082911968231201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,31,0.27669761180877683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,31,0.33333919048309324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,63,0.28089919090271
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,63,0.33647680282592773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,127,0.3007983922958374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,127,0.3463903903961182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,255,0.4411280155181885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,255,0.44484801292419435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,1,0.10669759511947632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,1,0.11780799627304077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,3,0.10575679540634156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,3,0.11782079935073853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,float16,511,0.8330783843994141
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,16,8,128,1,float16,fp8,511,0.6797344207763671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,7,0.10516159534454346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,7,0.11655839681625366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,15,0.10639519691467285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,15,0.11787519454956055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,31,0.11009440422058106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,31,0.11892000436782837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,63,0.12106399536132813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,1,0.160315203666687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,63,0.1289039969444275
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,127,0.1231279969215393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,127,0.13072479963302613
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,float16,255,0.16416159868240357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,7,0.15961600542068483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,1,128,1,float16,fp8,255,0.15780320167541503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,1,0.18291200399398805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,3,0.15979520082473755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,3,0.18271520137786865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,7,0.18290879726409912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,15,0.16256799697875976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,31,0.1681696057319641
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,15,0.1846783995628357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,31,0.18731520175933838
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,63,0.1731951951980591
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,63,0.19873119592666627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,127,0.18239840269088745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,127,0.20263841152191162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,float16,255,0.2602560043334961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,2,128,1,float16,fp8,255,0.25633599758148196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,1,0.2748176097869873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,1,0.32113759517669677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,3,0.2739583969116211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,3,0.320417594909668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,7,0.2762592077255249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,7,0.3206831932067871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,15,0.28394720554351804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,15,0.324017596244812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,31,0.2862864017486572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,31,0.3409264087677002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,63,0.28965919017791747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,63,0.3456768035888672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,127,0.3057487964630127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,127,0.35540640354156494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,float16,255,0.4414463996887207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,4,128,1,float16,fp8,255,0.4539055824279785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,1,0.496504020690918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,1,0.5822239875793457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,7,0.5101903915405274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,3,0.5040128231048584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,3,0.585643196105957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,7,0.5861631870269776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,15,0.5143152236938476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,15,0.6156608104705811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,31,0.5183263778686523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,31,0.633132791519165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,63,0.5229663848876953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,63,0.6326576232910156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,1,0.020268799364566804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,127,0.5603280067443848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,3,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,1,0.02107519954442978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,3,0.020468799769878386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,127,0.6473696231842041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,7,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,7,0.02131039947271347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,63,0.020212799310684204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,15,0.020443199574947356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,float16,255,0.8333680152893066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,15,0.021139200031757354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,16,8,128,1,float16,fp8,255,0.8542351722717285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,31,0.020393599569797517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,31,0.02096640020608902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,63,0.021012799441814424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,127,0.02017119973897934
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,127,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,255,0.02158239930868149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,255,0.022947199642658234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,511,0.025161600112915038
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,511,0.027057600021362305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,1023,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,8191,0.049804800748825075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,1023,0.025760000944137572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,2047,0.026927998661994933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,2047,0.02696160078048706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,4095,0.031119999289512635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,4095,0.030262398719787597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,8191,0.037324801087379456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,float16,16383,0.0714847981929779
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,1,128,1,float16,fp8,16383,0.05701919794082642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,1,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,1,0.021531200408935545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,3,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,3,0.02099040001630783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,7,0.020473599433898926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,7,0.021542400121688843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,15,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,15,0.021452799439430237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,31,0.02054399996995926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,31,0.021704000234603883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,63,0.02048960030078888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,63,0.021185599267482758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,127,0.02056480050086975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,127,0.021609599888324737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,255,0.022383999824523926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,255,0.023396800458431243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,2047,0.030588799715042116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,511,0.02534399926662445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,511,0.02688960134983063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,1023,0.02648960053920746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,1023,0.02667680084705353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,2047,0.02875039875507355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,4095,0.049635198712348935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,4095,0.04105440080165863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,8191,0.071043199300766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,8191,0.056766402721405027
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,float16,16383,0.11638079881668091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,2,128,1,float16,fp8,16383,0.08224480152130127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,1,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,1,0.021622399985790252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,3,0.020897600054740905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,3,0.021695999801158904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,7,0.020900799334049223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,7,0.02160319983959198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,15,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,15,0.02173279970884323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,31,0.020827199518680572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,31,0.021750399470329286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,63,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,63,0.02188960015773773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,127,0.020947200059890748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,127,0.021833600103855134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,255,0.02290399968624115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,255,0.02383359968662262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,511,0.025809600949287415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,511,0.027526399493217467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,1023,0.031780800223350524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,1023,0.030344000458717345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,2047,0.04939840137958527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,2047,0.037380799651145935
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,4095,0.07153120040893554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,4095,0.056884801387786864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,8191,0.11523200273513794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,8191,0.08262879848480224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,float16,16383,0.20132639408111572
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,1,0.02094080001115799
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,4,128,1,float16,fp8,16383,0.13315839767456056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,1,0.021961599588394165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,3,0.02110079973936081
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,3,0.02202560007572174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,7,0.021147200465202333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,7,0.022006399929523468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,15,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,15,0.021859200298786165
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,31,0.02123039960861206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,31,0.021907199919223786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,63,0.021134400367736818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,255,0.024145600199699403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,63,0.021939200162887574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,127,0.02123039960861206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,127,0.022116799652576447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,255,0.023051199316978455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,511,0.026708799600601196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,1023,0.04814879894256592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,511,0.028190401196479798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,1023,0.03850080072879791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,8191,0.19952640533447266
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,2047,0.07044159770011901
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,2047,0.05556640028953552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,4095,0.11315679550170898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,4095,0.08110560178756714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,8191,0.13201600313186646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,1,0.021860800683498383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,1,0.022745600342750548
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,7,0.022884799540042876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,15,0.021963199973106383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,float16,16383,0.3724272012710571
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,16,8,128,1,float16,fp8,16383,0.23096959590911864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,3,0.022012799978256226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,3,0.022729599475860597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,7,0.021908800303936004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,15,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,127,0.022668799757957457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,31,0.022023999691009523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,31,0.022716799378395082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,63,0.022023999691009523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,63,0.02287999987602234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,127,0.021956799924373625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,255,0.02364639937877655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,255,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,511,0.02698560059070587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,511,0.0282943993806839
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,1023,0.030502399802207945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,1023,0.031353598833084105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,2047,0.050659197568893435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,2047,0.04040960073471069
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,4095,0.07296159863471985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,4095,0.05809280276298523
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,8191,0.11654239892959595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,8191,0.0834384024143219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,16383,0.20327360630035402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,16383,0.13495199680328368
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,1,0.022230400145053862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,float16,32767,0.3790544033050537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,1,128,1,float16,fp8,32767,0.2351855993270874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,1,0.023030400276184082
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,3,0.022089600563049316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,3,0.023307199776172637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,7,0.022364799678325654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,7,0.02317280024290085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,15,0.022200000286102296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,15,0.023056000471115112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,31,0.02231840044260025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,31,0.022912000119686127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,63,0.022257600724697114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,63,0.023137600719928743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,511,0.028681600093841554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,127,0.02258719950914383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,127,0.02316479980945587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,1023,0.037520000338554384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,255,0.02419999986886978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,255,0.025153601169586183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,511,0.029679998755455017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,1023,0.04986560046672821
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,2047,0.0715183973312378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,2047,0.05716000199317932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,4095,0.11573760509490967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,4095,0.08313440084457398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,8191,0.2019263982772827
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,8191,0.1336176037788391
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,16383,0.3755120038986206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,16383,0.23605918884277344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,1,0.026156800985336303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,1,0.027577599883079527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,3,0.025953599810600282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,fp8,32767,0.4301568031311035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,3,0.0273391991853714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,2,128,1,float16,float16,32767,0.7205376148223877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,7,0.025953599810600282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,7,0.027475199103355406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,15,0.026025599241256712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,15,0.027614399790763855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,31,0.02598879933357239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,31,0.027374398708343507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,63,0.025993600487709045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,63,0.02773439884185791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,127,0.02621760070323944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,127,0.02784000039100647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,255,0.029967999458312987
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,255,0.031595200300216675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,2047,0.11806720495223999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,511,0.04864319860935211
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,511,0.04102079868316651
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,1023,0.07430239915847778
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,1023,0.06188639998435974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,2047,0.08591520190238952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,4095,0.203873610496521
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,4095,0.1368623971939087
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,8191,0.37267680168151857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,8191,0.23606081008911134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,1,0.01728159934282303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,1,0.018089599907398224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,7,0.017455999553203583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,16383,0.43384480476379395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,16383,0.7172207832336426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,3,0.017452800273895265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,3,0.0181536003947258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,7,0.01801760047674179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,15,0.017441600561141968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,15,0.01852799952030182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,31,0.01844000071287155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,31,0.017316800355911256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,fp8,32767,0.8231072425842285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,63,0.017351999878883362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,8,4,128,1,float16,float16,32767,1.4068464279174804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,511,0.021836799383163453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,63,0.018262399733066557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,127,0.0173552006483078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,1023,0.022651199996471406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,127,0.018505600094795228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,255,0.01892320066690445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,255,0.0203247994184494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,511,0.024486400187015533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,1023,0.021555200219154358
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,2047,0.02192640006542206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,2047,0.023056000471115112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,4095,0.022324800491333008
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,4095,0.023772799968719484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,8191,0.023745599389076232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,8191,0.02481919974088669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,16383,0.026169601082801818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,16383,0.026791998744010927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,float16,32767,0.02956640124320984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,1,128,1,float16,fp8,32767,0.031147199869155883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,1,0.01823360025882721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,1,0.019108800590038298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,3,0.018063999712467194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,3,0.0192671999335289
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,7,0.01812160015106201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,7,0.0189423993229866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,15,0.01831520050764084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,15,0.01912959963083267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,31,0.018262399733066557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,31,0.019148799777030944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,63,0.018379199504852294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,63,0.019257600605487823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,127,0.01812320053577423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,127,0.018966400623321535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,255,0.019787199795246124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,255,0.021214400231838227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,511,0.022865599393844603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,511,0.025047999620437623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,1023,0.02285120040178299
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,1023,0.0237296000123024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,2047,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,2047,0.024120000004768372
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,4095,0.023366400599479677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,4095,0.02468640059232712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,8191,0.02487040013074875
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,8191,0.026283198595047
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,16383,0.02748799920082092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,16383,0.027771198749542238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,float16,32767,0.031656000018119815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,2,128,1,float16,fp8,32767,0.03223359882831574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,1,0.019896000623703003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,1,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,3,0.02006720006465912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,3,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,7,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,7,0.020694400370121
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,15,0.01977919936180115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,15,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,31,0.01987359970808029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,31,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,63,0.019891199469566346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,63,0.020926399528980254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,127,0.019763199985027312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,127,0.02080480009317398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,255,0.021583999693393707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,255,0.022649599611759184
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,511,0.024592000246047973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,511,0.026550400257110595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,1023,0.024108800292015075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,1023,0.02531520128250122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,2047,0.02433120012283325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,2047,0.025960001349449157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,4095,0.02519200146198273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,4095,0.02653760015964508
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,8191,0.027745598554611207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,32767,0.041731199622154234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,8191,0.02802720069885254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,3,0.01823839992284775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,16383,0.03191519975662231
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,fp8,16383,0.03185440003871918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,8,4,128,1,float16,float16,32767,0.05024960041046143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,1,0.01839199960231781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,1,0.019275200366973878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,3,0.019284799695014954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,7,0.0182096004486084
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,7,0.018972800672054292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,15,0.018326400220394133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,15,0.019182400405406953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,31,0.01828639954328537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,31,0.019252799451351166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,63,0.018169599771499633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,63,0.01934880018234253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,127,0.018435199558734894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,127,0.01905599981546402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,255,0.019761599600315094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,255,0.021227200329303742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,511,0.02290399968624115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,511,0.02505280077457428
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,1023,0.022603200376033784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,1023,0.023940800130367278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,2047,0.0228752002120018
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,2047,0.024243199825286867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,4095,0.02367040067911148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,4095,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,8191,0.024937599897384644
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,8191,0.02643519937992096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,16383,0.027580800652503967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,16383,0.028369599580764772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,float16,32767,0.03154560029506683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,1,128,1,float16,fp8,32767,0.03216319978237152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,1,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,15,0.02088800072669983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,1,0.020798400044441223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,3,0.020078399777412416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,3,0.020947200059890748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,63,0.020953600108623505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,7,0.020180800557136537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,7,0.020880000293254854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,15,0.020137600600719452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,31,0.019913600385189058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,31,0.020838400721549986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,63,0.020155200362205507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,127,0.020110400021076204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,127,0.0208079993724823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,255,0.02160640060901642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,255,0.02284640073776245
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,511,0.02466239929199219
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,511,0.026897600293159483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,1023,0.024255999922752382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,1023,0.025545600056648254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,2047,0.0248879998922348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,2047,0.026063999533653258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,16383,0.03215200006961823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,4095,0.025654399394989015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,4095,0.026612800359725953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,8191,0.027952000498771667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,8191,0.027956798672676086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,16383,0.03142560124397278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,float16,32767,0.05065760016441345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,2,128,1,float16,fp8,32767,0.03979839980602264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,1,0.020180800557136537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,1,0.02104160040616989
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,3,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,3,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,7,0.020137600600719452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,7,0.02069920003414154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,15,0.020233599841594695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,15,0.020904000103473663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,31,0.020179200172424316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,31,0.021001599729061127
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,63,0.020161600410938264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,63,0.021278400719165803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,127,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,127,0.021142399311065672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,255,0.021779200434684752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,255,0.023089599609375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,511,0.02473440021276474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,511,0.026451200246810913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,1023,0.024743999540805816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,1023,0.025755199790000915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,2047,0.025339201092720032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,2047,0.026280000805854797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,4095,0.02693440020084381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,4095,0.027033600211143493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,8191,0.030374398827552794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,32767,0.05815039873123169
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,8191,0.03110400140285492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,16383,0.049620801210403444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,fp8,16383,0.0400303989648819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,8,4,128,1,float16,float16,32767,0.07179520130157471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,1,0.021134400367736818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,1,0.02173279970884323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,3,0.021030400693416596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,31,0.021665599942207337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,3,0.022075200080871583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,63,0.021686400473117828
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,7,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,7,0.022044800221920013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,15,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,15,0.021886399388313292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,31,0.021155199408531188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,63,0.021238400042057036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,127,0.02126079946756363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,127,0.02176000028848648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,255,0.022806400060653688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,255,0.02426239997148514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,511,0.025969600677490233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,511,0.02775680124759674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,1023,0.026875200867652892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,1023,0.027483201026916503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,2047,0.030025601387023926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,2047,0.030726400017738343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,4095,0.050409597158432004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,4095,0.04262079894542694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,8191,0.07175999879837036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,8191,0.05692160129547119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,16383,0.11579200029373168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,16383,0.08291680216789246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,float16,32767,0.20156641006469728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,1,0.021140800416469575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,1,128,1,float16,fp8,32767,0.13503680229187012
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,15,0.021324799954891206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,15,0.021929599344730377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,1,0.022036799788475038
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,31,0.0213359996676445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,3,0.021400000154972076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,3,0.022124800086021423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,7,0.02131839990615845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,7,0.022009600698947907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,31,0.02199999988079071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,63,0.021092799305915833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,63,0.022336000204086305
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,127,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,127,0.02245119959115982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,255,0.023206399381160737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,255,0.024281600117683412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,511,0.026414400339126586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,511,0.027644801139831542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,1023,0.029873600602149962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,1023,0.031201601028442383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,2047,0.0499343991279602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,2047,0.04100799858570099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,4095,0.07247999906539918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,4095,0.058027201890945436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,8191,0.11706559658050537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,8191,0.08187680244445801
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,16383,0.20186240673065187
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,16383,0.1338655948638916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,1,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,1,0.02226720005273819
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,float16,32767,0.3764672040939331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,2,128,1,float16,fp8,32767,0.2350895881652832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,3,0.021503999829292297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,3,0.02231999933719635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,7,0.02133280038833618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,63,0.0224031999707222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,7,0.02263039946556091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,15,0.021635200083255767
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,15,0.02242559939622879
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,31,0.021678400039672852
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,31,0.022387200593948366
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,63,0.02147040069103241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,127,0.021830399334430695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,127,0.022623999416828154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,255,0.0235615998506546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,255,0.024670399725437164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,511,0.026704001426696777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,511,0.0283376008272171
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,1023,0.048372799158096315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,1023,0.036099201440811156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,2047,0.07119200229644776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,2047,0.05639839768409729
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,4095,0.11476160287857055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,4095,0.08222079873085023
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,8191,0.199726402759552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,8191,0.13337119817733764
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,16383,0.37296481132507325
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,16383,0.2300352096557617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,1,0.019991999864578246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,1,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,3,0.02012320011854172
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,3,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,fp8,32767,0.42918081283569337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,8,4,128,1,float16,float16,32767,0.7196688175201416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,7,0.019947199523448943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,7,0.02093919962644577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,15,0.02003040015697479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,15,0.0209647998213768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,31,0.019823999702930452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,31,0.02101760059595108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,63,0.02011200040578842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,63,0.021031999588012697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,127,0.020047999918460846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,127,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,255,0.021825599670410156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,255,0.022862400114536285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,511,0.02452320009469986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,511,0.026804798841476442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,1023,0.024510399997234346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,1023,0.02553119957447052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,2047,0.02458080053329468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,2047,0.026256000995635985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,4095,0.025497600436210632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,4095,0.02640640139579773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,32767,0.049983999133110045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,8191,0.027616000175476073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,8191,0.028255999088287354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,float16,16383,0.03126719892024994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,16383,0.03264159858226776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,1,128,1,float16,fp8,32767,0.03890079855918884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,1,0.019977599382400513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,1,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,3,0.020127999782562255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,3,0.02111999988555908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,7,0.01998240053653717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,7,0.02083359956741333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,15,0.019963200390338897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,15,0.0208079993724823
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,31,0.02011680006980896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,31,0.020798400044441223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,63,0.020339199900627138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,63,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,127,0.02020000070333481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,127,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,255,0.02157599925994873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,255,0.02292319983243942
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,511,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,511,0.026641601324081422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,1023,0.024711999297142028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,1023,0.025820800662040712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,2047,0.025209599733352663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,2047,0.02614719867706299
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,4095,0.02672800123691559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,4095,0.0274944007396698
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,8191,0.030622398853302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,8191,0.031164801120758055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,16383,0.04950560033321381
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,16383,0.036827200651168825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,float16,32767,0.07242559790611267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,2,128,1,float16,fp8,32767,0.05759040117263794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,1,0.020136000216007234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,1,0.021209600567817687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,3,0.020267200469970704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,3,0.021057599782943727
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,7,0.020553599298000335
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,7,0.02113119959831238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,15,0.020193600654602052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,15,0.02099519968032837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,31,0.02029760032892227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,31,0.021185599267482758
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,63,0.020275199413299562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,63,0.021014399826526642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,127,0.02051520049571991
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,127,0.021240000426769257
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,255,0.022064000368118286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,255,0.0230880007147789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,511,0.02476000040769577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,511,0.026897600293159483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,1023,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,1023,0.026187199354171752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,2047,0.026948800683021544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,16383,0.07094879746437073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,2047,0.02691679894924164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,4095,0.03070879876613617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,4095,0.030559998750686646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,8191,0.04872319996356964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,8191,0.03848159909248352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,16383,0.05681920051574707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,float16,32767,0.11520320177078247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,1,0.023588800430297853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,8,4,128,1,float16,fp8,32767,0.08110560178756714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,1,0.024852800369262695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,3,0.023795199394226075
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,3,0.02444480061531067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,7,0.02390879988670349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,7,0.024376000463962554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,15,0.02380319982767105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,15,0.024718399345874786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,31,0.02380799949169159
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,31,0.024430400133132933
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,63,0.02377759963274002
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,63,0.024732799828052522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,127,0.023838399350643157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,127,0.024988800287246704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,255,0.025964799523353576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,255,0.02670240104198456
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,511,0.03012799918651581
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,511,0.03030720055103302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,1023,0.05132319927215576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,1023,0.03984160125255585
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,2047,0.07284319996833802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,2047,0.058406400680541995
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,4095,0.1163599967956543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,4095,0.08312320113182067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,8191,0.20311360359191893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,8191,0.13394080400466918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,1,0.027396801114082336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,1,0.02895039916038513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,float16,16383,0.377512001991272
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,1,128,1,float16,fp8,16383,0.23624160289764404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,3,0.02722879946231842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,3,0.029129600524902342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,7,0.027559998631477355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,7,0.028881600499153136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,15,0.027393600344657897
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,15,0.028992000222206115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,31,0.027585598826408386
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,31,0.028908801078796387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,63,0.027612799406051637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,63,0.029232001304626463
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,127,0.027596798539161683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,127,0.029174399375915528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,255,0.03289119899272919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,255,0.033211201429367065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,511,0.050312000513076785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,511,0.04450240135192871
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,1023,0.07636799812316894
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,1023,0.06302239894866943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,2047,0.11945919990539551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,2047,0.08731039762496948
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,4095,0.20467839241027833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,4095,0.13971359729766847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,8191,0.3781856060028076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,8191,0.2404576063156128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,1,0.03501920104026794
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,1,0.03781439960002899
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,3,0.035011199116706845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,3,0.03816959857940674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,7,0.03471840023994446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,fp8,16383,0.4382175922393799
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,2,128,1,float16,float16,16383,0.720854377746582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,7,0.038166400790214536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,63,0.03827039897441864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,15,0.03511039912700653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,15,0.037982401251792905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,31,0.03503519892692566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,31,0.03818399906158447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,63,0.034971201419830324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,127,0.037411201000213626
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,127,0.038264000415802
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,255,0.05574880242347717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,255,0.049958398938179015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,511,0.07667679786682129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,511,0.06775839924812317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,1023,0.12264000177383423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,1023,0.09582080245018006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,2047,0.2074336051940918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,2047,0.1454767942428589
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,4095,0.3781183958053589
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,4095,0.24519200325012208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,1,0.029976001381874083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,1,0.03237279951572418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,8191,0.7174928188323975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,8191,0.4447375774383545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,3,0.030287998914718627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,15,0.030155199766159057
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,3,0.031998398900032046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,7,0.030055999755859375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,7,0.03218719959259033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,15,0.031825599074363706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,fp8,16383,0.8332256317138672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,31,0.030164799094200133
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,31,0.03176800012588501
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,8,4,128,1,float16,float16,16383,1.400551986694336
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,63,0.030377599596977233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,63,0.03169440031051636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,127,0.03047040104866028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,127,0.03189919888973236
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,1023,0.08016319870948792
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,255,0.0351936012506485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,255,0.03591200113296509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,511,0.05864319801330566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,511,0.04565120041370392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,1023,0.06608960032463074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,2047,0.12310240268707276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,2047,0.09021279811859131
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,float16,4095,0.20791199207305908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,1,128,1,float16,fp8,4095,0.14082560539245606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,7,0.037545600533485414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,1,0.0375247985124588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,1,0.040622401237487796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,3,0.037852799892425536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,3,0.040596801042556765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,7,0.040801599621772766
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,15,0.04056800007820129
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,15,0.037544000148773196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,31,0.038017600774765015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,31,0.040703999996185306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,63,0.03791680037975311
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,63,0.040720000863075256
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,127,0.038945600390434265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,127,0.040756800770759584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,255,0.058790397644042966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,255,0.050748801231384276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,511,0.07991520166397095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,511,0.07128159999847412
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,1023,0.12779680490493775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,1023,0.09914559721946717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,2047,0.21103360652923583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,2047,0.1473296046257019
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,1,0.052716797590255736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,float16,4095,0.38508000373840334
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,1,0.058315199613571164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,2,128,1,float16,fp8,4095,0.2488624095916748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,15,0.058815997838973996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,3,0.052039998769760135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,7,0.0519711971282959
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,3,0.05851680040359497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,7,0.05842880010604858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,15,0.05237119793891907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,127,0.06169760227203369
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,31,0.052532798051834105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,255,0.0821071982383728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,63,0.05434240102767944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,31,0.05856159925460815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,63,0.05829439759254455
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,127,0.06310399770736694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,255,0.08470559716224671
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,511,0.12895519733428956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,511,0.11167360544204712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,1023,0.21618878841400146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,1023,0.16549760103225708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,2047,0.3848479986190796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,1,0.04494880139827728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,1,0.047572800517082216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,2047,0.2596832036972046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,3,0.04458560049533844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,3,0.04796960055828094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,15,0.048065599799156186
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,float16,4095,0.7200975894927979
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,7,0.04439519941806793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,7,0.04791519939899445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,15,0.044582399725914004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,8,4,128,1,float16,fp8,4095,0.45822877883911134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,31,0.04455040097236633
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,31,0.047942399978637695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,63,0.044696000218391416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,63,0.04766559898853302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,127,0.047681599855422974
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,127,0.04800159931182861
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,255,0.06640480160713196
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,255,0.058894401788711546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,2047,0.2180624008178711
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,511,0.08852159976959229
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,511,0.07891839742660522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,float16,1023,0.13536640405654907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,1023,0.10695840120315551
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,1,0.059171199798583984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,1,128,1,float16,fp8,2047,0.15641599893569946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,1,0.06469600200653076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,3,0.05891039967536926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,31,0.05890880227088928
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,3,0.06504799723625183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,7,0.058955198526382445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,7,0.0647823989391327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,15,0.05863040089607239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,15,0.0650991976261139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,31,0.0648256003856659
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,63,0.06155359745025635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,63,0.0648256003856659
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,127,0.07011680006980896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,127,0.06814399957656861
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,255,0.09400320053100586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,255,0.08829759955406188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,511,0.13636480569839476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,511,0.11784640550613404
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,1023,0.22424960136413574
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,1023,0.17263200283050537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,float16,2047,0.3911295890808105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,1,0.08761119842529297
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,1,0.09985600113868713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,2,128,1,float16,fp8,2047,0.2675631999969482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,3,0.08672000169754028
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,3,0.09967039823532105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,7,0.08745759725570679
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,7,0.09990400075912476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,63,0.0944271981716156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,15,0.08748639822006225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,15,0.09991040229797363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,31,0.0870688021183014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,31,0.09946240186691284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,63,0.10407040119171143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,127,0.10026880502700805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,127,0.11197439432144166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,255,0.1437376022338867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,255,0.13937920331954956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,511,0.2285327911376953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,511,0.19785120487213134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,1023,0.3934463977813721
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,1023,0.3025216102600098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,1,0.02025440037250519
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,1,0.020955200493335723
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,3,0.020161600410938264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,3,0.02115360051393509
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,float16,2047,0.7348959922790528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,8,4,128,1,float16,fp8,2047,0.4911327838897705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,7,0.0203232005238533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,7,0.021089600026607515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,15,0.020257599651813507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,15,0.0215488001704216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,31,0.02033119946718216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,31,0.021172800660133363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,63,0.020214399695396422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,63,0.021139200031757354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,127,0.020270399749279022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,127,0.021086399257183076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,255,0.02187040001153946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,255,0.023398399353027344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,511,0.024857600033283234
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,511,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,1023,0.024665600061416625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,1023,0.02608479857444763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,2047,0.024864000082015992
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,2047,0.026422399282455444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,4095,0.02696000039577484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,4095,0.027423998713493346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,8191,0.030774399638175964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,8191,0.031374400854110716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,16383,0.05002560019493103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,16383,0.04180960059165954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,float16,32767,0.07311040163040161
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,1,128,1,float16,fp8,32767,0.059280002117156984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,1,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,1,0.02131199985742569
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,3,0.020398400723934174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,3,0.021076799929142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,7,0.020479999482631683
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,7,0.02115039974451065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,15,0.02017440050840378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,15,0.02117599993944168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,31,0.020182399451732634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,31,0.021398399770259858
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,63,0.020380799472332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,63,0.02118239998817444
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,127,0.02037599980831146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,127,0.021167999505996703
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,255,0.021996800601482392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,255,0.0230880007147789
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,511,0.024902400374412537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,511,0.027433601021766663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,1023,0.024835200607776643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,1023,0.02605760097503662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,2047,0.026844799518585205
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,2047,0.027030399441719054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,16383,0.05667999982833862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,4095,0.03030880093574524
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,32767,0.11597119569778443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,1,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,4095,0.030744001269340515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,8191,0.0488864004611969
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,8191,0.037905600666999814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,float16,16383,0.07081599831581116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,2,128,1,float16,fp8,32767,0.08200479745864868
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,1,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,3,0.0205487996339798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,3,0.02130240052938461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,7,0.02044160068035126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,7,0.02110240012407303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,15,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,15,0.021435199677944182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,31,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,31,0.021447999775409697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,63,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,63,0.021492800116539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,127,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,127,0.021206399798393248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,255,0.022495999932289124
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,255,0.023310400545597076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,511,0.02540319859981537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,511,0.02689119875431061
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,1023,0.02651360034942627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,1023,0.026633599400520326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,2047,0.029532799124717714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,8191,0.056827199459075925
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,2047,0.030671998858451843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,4095,0.048875200748443606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,4095,0.03945119976997376
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,8191,0.0707647979259491
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,16383,0.11522400379180908
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,16383,0.08105279803276062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,1,0.0662015974521637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,float16,32767,0.20154879093170167
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,8,4,128,1,float16,fp8,32767,0.13233439922332763
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,1,0.07231199741363525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,15,0.06629440188407898
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,3,0.06625760197639466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,3,0.07192639708518982
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,7,0.06618880033493042
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,7,0.07204319834709168
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,15,0.07215359807014465
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,31,0.06639360189437866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,31,0.07222239971160889
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,63,0.07220159769058228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,63,0.07250720262527466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,127,0.07779200077056884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,127,0.0779807984828949
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,255,0.10020639896392822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,255,0.09570879936218261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,511,0.14140000343322753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,1,0.1056175947189331
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,511,0.12768640518188476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,float16,1023,0.22928318977355958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,1,0.09452959895133972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,1,128,1,float16,fp8,1023,0.17952640056610109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,3,0.09470400214195251
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,3,0.10603200197219849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,7,0.09355040192604065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,63,0.10292479991912842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,7,0.10618079900741577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,15,0.09468160271644592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,15,0.10672639608383179
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,31,0.09594720005989074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,31,0.10600639581680298
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,63,0.11375199556350708
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,127,0.10894880294799805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,127,0.11926239728927612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,255,0.15174880027770996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,255,0.14500000476837158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,511,0.2331023931503296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,511,0.2085792064666748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,1,0.149344003200531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,float16,1023,0.39751520156860354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,2,128,1,float16,fp8,1023,0.30765280723571775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,1,0.1748800039291382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,3,0.1506351947784424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,3,0.17457760572433473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,7,0.15051039457321166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,7,0.17454080581665038
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,15,0.1492975950241089
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,15,0.1736863970756531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,31,0.15821599960327148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,31,0.17598240375518798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,255,0.2394239902496338
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,63,0.16021280288696288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,63,0.18801120519638062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,127,0.1685328006744385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,127,0.19246560335159302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,255,0.2445904016494751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,511,0.41826558113098145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,1,0.10614559650421143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,511,0.367574405670166
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,1,0.11777280569076538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,3,0.10580320358276367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,15,0.10637919902801514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,3,0.11697920560836791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,float16,1023,0.7386879920959473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,8,4,128,1,float16,fp8,1023,0.5657296180725098
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,7,0.10552959442138672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,7,0.11772480010986328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,15,0.11803840398788452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,127,0.13081439733505248
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,31,0.10590879917144776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,31,0.11789439916610718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,255,0.1574447989463806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,63,0.11657600402832032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,63,0.1285199999809265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,127,0.1271391987800598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,255,0.16073280572891235
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,float16,511,0.23431839942932128
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,3,0.1820736050605774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,1,128,1,float16,fp8,511,0.22037920951843262
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,1,0.16117119789123535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,1,0.18358240127563477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,3,0.1598736047744751
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,7,0.16104320287704468
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,7,0.1838528037071228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,15,0.1613055944442749
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,15,0.1824463963508606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,31,0.1680559992790222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,31,0.18451199531555176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,127,0.20377600193023682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,63,0.17197279930114745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,63,0.19799360036849975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,127,0.18179999589920043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,255,0.2560096025466919
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,255,0.2553423881530762
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,float16,511,0.41783838272094725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,1,0.27407519817352294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,2,128,1,float16,fp8,511,0.3800575971603394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,1,0.3199376106262207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,3,0.2758143901824951
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,3,0.3196752071380615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,7,0.2764319896697998
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,7,0.3229968070983887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,15,0.27748799324035645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,15,0.32357120513916016
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,31,0.2845312118530273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,31,0.34251999855041504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,63,0.2878976106643677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,63,0.3481584072113037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,127,0.3046976089477539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,127,0.3503504037857056
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,255,0.43647041320800783
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,1,0.020372800529003143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,255,0.45239839553833006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,1,0.021132799983024596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,3,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,3,0.021342399716377258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,7,0.02016959935426712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,float16,511,0.799287986755371
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,8,4,128,1,float16,fp8,511,0.6869311809539795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,7,0.02093279957771301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,15,0.020289599895477295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,15,0.021073600649833678
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,255,0.02211360037326813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,31,0.0204927995800972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,31,0.021121600270271303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,63,0.020436799526214598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,63,0.020974400639533996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,127,0.020420800149440765
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,127,0.020927999913692475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,255,0.023107199370861052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,511,0.025033599138259886
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,511,0.026822400093078614
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,1023,0.024611200392246246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,1023,0.02627519965171814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,2047,0.026688000559806822
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,2047,0.02677280008792877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,4095,0.03320319950580597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,32767,0.11472480297088623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,4095,0.0304639995098114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,8191,0.04934560060501099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,8191,0.03687199950218201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,float16,16383,0.07116000056266784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,16383,0.0569648027420044
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,1,128,1,float16,fp8,32767,0.08292800188064575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,1,0.020499199628829956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,1,0.02102559953927994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,3,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,3,0.021247999370098115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,7,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,7,0.021592000126838685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,15,0.0205487996339798
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,15,0.021695999801158904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,31,0.02045920044183731
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,31,0.021187199652194975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,63,0.0203247994184494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,63,0.021377600729465485
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,127,0.020776000618934632
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,127,0.021750399470329286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,255,0.022356800734996796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,255,0.023848000168800353
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,511,0.025467199087142945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,511,0.026899200677871705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,1023,0.026550400257110595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,1023,0.02680160105228424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,2047,0.030267199873924254
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,2047,0.03030720055103302
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,4095,0.04938240051269531
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,4095,0.036724799871444704
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,32767,0.20215039253234862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,8191,0.07077119946479797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,8191,0.056201601028442384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,float16,16383,0.11548800468444824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,16383,0.08248159885406495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,1,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,2,128,1,float16,fp8,32767,0.13464159965515138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,1,0.0217071995139122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,3,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,3,0.02144960016012192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,7,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,7,0.021830399334430695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,15,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,15,0.02168000042438507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,31,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,31,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,63,0.020905600488185884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,63,0.021704000234603883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,127,0.020975999534130096
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,127,0.021550400555133818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,255,0.022657600045204163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,255,0.023747199773788454
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,511,0.02576960027217865
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,511,0.027489599585533143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,1023,0.03166239857673645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,1023,0.030593600869178773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,2047,0.0494704008102417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,2047,0.04071039855480194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,4095,0.07390879988670349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,16383,0.20075678825378418
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,16383,0.13357280492782592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,4095,0.056548798084259035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,8191,0.11498719453811646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,8191,0.08157119750976563
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,1,0.021848000586032867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,1,0.02282080054283142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,float16,32767,0.37414720058441164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,3,0.021792000532150267
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,8,4,128,1,float16,fp8,32767,0.2323296070098877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,3,0.022728000581264497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,7,0.021667200326919555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,7,0.022683200240135194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,15,0.021996800601482392
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,15,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,31,0.021955199539661407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,63,0.022096000611782074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,31,0.023025600612163542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,63,0.02282080054283142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,127,0.02215359956026077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,127,0.02306240051984787
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,255,0.023972800374031066
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,255,0.024721600115299225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,511,0.026723200082778932
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,511,0.028398400545120238
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,1023,0.03442400097846985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,1023,0.03247359991073608
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,2047,0.051526397466659546
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,2047,0.04214400053024292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,4095,0.07337440252304077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,4095,0.05880640149116516
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,8191,0.11912640333175659
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,8191,0.08545759916305543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,16383,0.20937600135803222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,16383,0.13976800441741943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,32767,0.3850895881652832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,32767,0.24050719738006593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,1,0.02223840057849884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,1,0.023183999955654143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,3,0.02221280038356781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,3,0.022969600558280946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,fp8,65535,0.4425759792327881
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,31,0.022200000286102296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,1,128,1,float16,float16,65535,0.7312464237213134
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,63,0.022467200458049775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,7,0.022256000339984892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,7,0.023025600612163542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,15,0.022118400037288665
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,15,0.023137600719928743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,31,0.023124800622463228
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,63,0.023070399463176728
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,127,0.02324959933757782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,127,0.022177599370479584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,255,0.024460799992084503
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,255,0.02524479925632477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,511,0.027292799949645997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,511,0.029123198986053467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,1023,0.049700799584388736
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,1023,0.04122079908847809
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,2047,0.07135519981384278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,2047,0.056664001941680905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,4095,0.11436480283737183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,4095,0.08178719878196716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,8191,0.2016160011291504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,8191,0.13406399488449097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,16383,0.37526400089263917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,16383,0.23465440273284913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,32767,0.43187198638916013
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,32767,0.7207968235015869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,1,0.017134399712085725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,1,0.018385599553585052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,3,0.017524799704551695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,3,0.018376000225543976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,7,0.017291200160980225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,fp8,65535,0.8224896430969239
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,63,0.0172447994351387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,15,0.017243200540542604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,7,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,15,0.018376000225543976
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,4,2,128,1,float16,float16,65535,1.4178175926208496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,31,0.017574399709701538
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,31,0.01841440051794052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,127,0.017339199781417847
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,63,0.018347199261188506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,127,0.01812320053577423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,255,0.01908160001039505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,255,0.020454399287700653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,511,0.02194560021162033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,511,0.02425280064344406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,1023,0.022625599801540375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,1023,0.02385919988155365
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,2047,0.022459200024604796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,2047,0.024003200232982635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,4095,0.022644799947738648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,4095,0.024459199607372285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,8191,0.024732799828052522
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,8191,0.026097598671913146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,16383,0.027830401062965394
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,16383,0.029183998703956604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,32767,0.02995840013027191
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,32767,0.03072479963302612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,float16,65535,0.034822401404380796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,1,128,1,float16,fp8,65535,0.03483999967575073
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,1,0.018171200156211854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,1,0.019097599387168884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,3,0.017921599745750427
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,3,0.01910240054130554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,7,0.018188799917697906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,7,0.018937599658966065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,15,0.018164800107479097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,15,0.019089600443840025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,31,0.01820160001516342
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,31,0.01884479969739914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,63,0.018268799781799315
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,63,0.019043199717998505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,127,0.018400000035762788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,127,0.01899999976158142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,255,0.019806399941444397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,255,0.021060800552368163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,511,0.02252320051193237
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,511,0.02502079904079437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,1023,0.023393599689006804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,1023,0.025076800584793092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,2047,0.023427200317382813
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,2047,0.024883200228214265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,4095,0.023705600202083586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,4095,0.02521919906139374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,8191,0.025726398825645445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,8191,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,16383,0.02766079902648926
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,16383,0.028350400924682616
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,32767,0.03142240047454834
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,32767,0.03187040090560913
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,float16,65535,0.04902240037918091
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,4,2,128,1,float16,fp8,65535,0.039787200093269345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,1,0.018475200235843658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,1,0.019227199256420135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,3,0.018345600366592406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,3,0.019083200395107268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,7,0.018161599338054658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,7,0.01916159987449646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,127,0.01839679926633835
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,15,0.01812479943037033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,15,0.018929600715637207
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,31,0.018371200561523436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,31,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,63,0.018377600610256194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,63,0.019099199771881105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,127,0.01926400065422058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,255,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,255,0.021084800362586975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,511,0.022675199806690215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,511,0.025230398774147032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,1023,0.023614400625228883
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,1023,0.024883200228214265
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,2047,0.02364960014820099
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,2047,0.025201600790023804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,4095,0.02415039986371994
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,4095,0.025334399938583375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,8191,0.025625601410865784
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,8191,0.02710399925708771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,65535,0.043875199556350705
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,16383,0.03041119873523712
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,16383,0.030478399991989136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,32767,0.03416160047054291
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,fp8,32767,0.03486559987068176
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,1,128,1,float16,float16,65535,0.05139200091361999
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,1,0.019993600249290467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,1,0.020902399718761445
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,3,0.020076799392700195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,3,0.020899200439453126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,7,0.02009280025959015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,7,0.021172800660133363
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,15,0.020028799772262573
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,15,0.02089280039072037
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,31,0.01990240067243576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,31,0.020900799334049223
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,63,0.01996160000562668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,63,0.021004800498485566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,127,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,127,0.02099519968032837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,255,0.02176959961652756
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,255,0.022966399788856506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,511,0.02454400062561035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,511,0.026862400770187377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,1023,0.025355198979377748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,1023,0.026638400554656983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,2047,0.025543999671936036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,2047,0.027132800221443175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,4095,0.025798401236534117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,4095,0.02709279954433441
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,8191,0.028998398780822755
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,8191,0.029172798991203307
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,16383,0.032323199510574344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,16383,0.033011201024055484
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,32767,0.05059360265731812
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,32767,0.041643199324607846
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,float16,65535,0.07364320158958435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,4,2,128,1,float16,fp8,65535,0.059494400024414064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,1,0.021084800362586975
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,1,0.021857599914073943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,3,0.021007999777793884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,3,0.02191520035266876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,7,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,7,0.02171040028333664
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,15,0.021081599593162536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,15,0.021719999611377716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,31,0.021007999777793884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,31,0.021860800683498383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,63,0.020824000239372253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,63,0.021780799329280853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,127,0.021003200113773345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,127,0.021964800357818604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,255,0.02274720072746277
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,255,0.023785600066185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,511,0.025788798928260803
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,511,0.027427199482917785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,1023,0.028020799160003662
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,1023,0.028281599283218384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,2047,0.03196640014648437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,2047,0.03214719891548157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,4095,0.05075680017471314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,4095,0.04447680115699768
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,8191,0.07371199727058411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,8191,0.059252798557281494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,16383,0.12059999704360962
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,16383,0.08684960007667542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,32767,0.20800960063934326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,32767,0.13972799777984618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,1,0.021279999613761903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,float16,65535,0.383404803276062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,1,0.021960000693798064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,1,128,1,float16,fp8,65535,0.2414112091064453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,15,0.021929599344730377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,3,0.021447999775409697
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,3,0.021784000098705292
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,7,0.02104640007019043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,7,0.02197919934988022
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,15,0.021355199813842773
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,31,0.021142399311065672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,31,0.02208160012960434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,63,0.02130720019340515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,63,0.02208320051431656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,127,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,127,0.022363199293613432
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,255,0.02290239930152893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,255,0.02433760017156601
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,511,0.025856000185012818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,511,0.027775999903678895
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,1023,0.03224959969520569
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,1023,0.031908801198005675
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,2047,0.05079360008239746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,2047,0.038299199938774106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,4095,0.07240480184555054
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,4095,0.058392000198364255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,8191,0.1183616042137146
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,8191,0.08585439920425415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,16383,0.20492000579833985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,16383,0.1370144009590149
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,32767,0.3787168025970459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,32767,0.23816320896148682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,1,0.019862399995326997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,1,0.020960000157356263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,3,0.01997919976711273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,3,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,float16,65535,0.724836778640747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,4,2,128,1,float16,fp8,65535,0.4345232009887695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,7,0.019655999541282655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,7,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,15,0.01990240067243576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,15,0.020894399285316466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,31,0.019921599328517912
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,31,0.02088959962129593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,63,0.02011519968509674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,63,0.020924800634384157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,127,0.01987839937210083
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,127,0.021004800498485566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,255,0.021691200137138367
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,255,0.023060800135135652
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,511,0.024527999758720397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,511,0.026574400067329407
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,1023,0.025116801261901855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,1023,0.026767998933792114
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,2047,0.025484800338745117
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,2047,0.02688480019569397
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,4095,0.025899198651313782
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,4095,0.027286401391029357
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,8191,0.02892799973487854
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,8191,0.02898559868335724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,16383,0.03807680010795593
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,16383,0.035436800122261046
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,32767,0.053395199775695804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,32767,0.04147199988365173
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,float16,65535,0.07672320008277893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,1,128,1,float16,fp8,65535,0.06306399703025818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,1,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,1,0.02144960016012192
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,3,0.019939200580120088
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,3,0.021115200221538545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,7,0.020015999674797058
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,7,0.021111999452114106
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,15,0.02024800032377243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,15,0.02101919949054718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,31,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,31,0.020596800744533537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,63,0.02028159946203232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,63,0.021108800172805788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,127,0.02011200040578842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,127,0.02096160054206848
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,1023,0.0270224004983902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,255,0.02173440009355545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,255,0.022801600396633148
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,511,0.024452799558639528
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,511,0.026657599210739135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,1023,0.025496000051498414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,2047,0.02561120092868805
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,2047,0.027241599559783936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,4095,0.027399998903274537
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,32767,0.07350720167160034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,4095,0.027926400303840637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,8191,0.031887999176979064
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,8191,0.03305439949035645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,16383,0.05094879865646362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,16383,0.0387935996055603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,32767,0.06032000184059143
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,float16,65535,0.1191472053527832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,4,2,128,1,float16,fp8,65535,0.08751360177993775
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,1,0.023839999735355378
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,1,0.024483199417591094
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,3,0.023870399594306944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,3,0.0244159996509552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,7,0.024115200340747833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,7,0.024707199633121492
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,15,0.023710399866104126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,15,0.024736000597476958
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,31,0.023819200694561005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,31,0.024275200068950654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,63,0.023608000576496126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,63,0.024222399294376373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,127,0.02436479926109314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,127,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,255,0.025830399990081788
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,255,0.02677600085735321
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,511,0.03094240128993988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,4095,0.11699999570846557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,511,0.030827200412750243
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,1023,0.05058240294456482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,1023,0.04188160002231598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,2047,0.07308800220489502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,2047,0.057929599285125734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,4095,0.08404960036277771
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,8191,0.2040623903274536
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,8191,0.13588639497756957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,16383,0.37856159210205076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,16383,0.23724160194396973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,1,0.02746720016002655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,1,0.029150399565696716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,3,0.027235201001167296
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,3,0.02916640043258667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,float16,32767,0.7250288009643555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,1,128,1,float16,fp8,32767,0.4315648078918457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,7,0.02725920081138611
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,7,0.02908160090446472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,15,0.02754879891872406
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,15,0.029084798693656922
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,31,0.027590399980545043
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,31,0.028968000411987306
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,63,0.02727839946746826
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,63,0.029110398888587952
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,127,0.027382400631904603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,1023,0.07640320062637329
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,127,0.02895359992980957
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,255,0.033276799321174624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,255,0.03320319950580597
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,511,0.050809597969055174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,511,0.04370880126953125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,1023,0.06312959790229797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,2047,0.11877119541168213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,2047,0.0874127984046936
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,4095,0.20414559841156005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,4095,0.1395311951637268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,8191,0.3779439926147461
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,8191,0.23894879817962647
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,1,0.03060320019721985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,1,0.032304000854492185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,16383,0.7192671775817872
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,16383,0.4335504055023193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,3,0.030454400181770324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,7,0.03200159966945648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,3,0.032225599884986876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,7,0.030836799740791322
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,fp8,32767,0.8259296417236328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,15,0.03044160008430481
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,15,0.03208160102367401
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,4,2,128,1,float16,float16,32767,1.4097935676574707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,31,0.030193600058555602
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,31,0.03233759999275208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,63,0.03092319965362549
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,63,0.032120001316070554
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,127,0.03038719892501831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,127,0.032393598556518556
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,255,0.03414559960365295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,255,0.03574720025062561
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,511,0.053609597682952884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,511,0.04721119999885559
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,1023,0.08161280155181885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,1023,0.06622400283813476
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,2047,0.12229759693145752
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,2047,0.09005600214004517
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,4095,0.20683999061584474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,4095,0.1429967999458313
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,1,0.03776159882545471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,float16,8191,0.38085598945617677
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,1,0.040747201442718504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,1,128,1,float16,fp8,8191,0.24461278915405274
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,3,0.03739840090274811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,3,0.040663999319076535
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,31,0.038115200400352475
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,7,0.03774079978466034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,7,0.040822398662567136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,15,0.038280001282691954
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,15,0.04065119922161102
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,31,0.04037440121173859
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,63,0.03784799873828888
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,63,0.04109599888324737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,127,0.041515201330184937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,127,0.039771199226379395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,255,0.05869439840316772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,255,0.05244160294532776
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,511,0.08022720217704774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,511,0.07030079960823059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,1023,0.12612160444259643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,1023,0.09950079917907714
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,2047,0.2093183994293213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,2047,0.14825279712677003
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,4095,0.3848464012145996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,4095,0.24870080947875978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,1,0.04481280148029328
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,1,0.047707200050354004
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,3,0.04460960030555725
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,3,0.04769600033760071
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,float16,8191,0.724726390838623
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,4,2,128,1,float16,fp8,8191,0.44573440551757815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,7,0.04456959962844849
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,7,0.04795039892196655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,15,0.044763201475143434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,15,0.047998398542404175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,31,0.04479680061340332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,31,0.047732800245285034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,63,0.04505600035190582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,127,0.045342400670051575
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,63,0.04753279983997345
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,127,0.04778720140457153
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,255,0.0652895987033844
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,255,0.0593392014503479
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,511,0.08775519728660583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,511,0.07765759825706482
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,1023,0.13376959562301635
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,1023,0.1062432050704956
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,2047,0.21707520484924317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,2047,0.15678240060806276
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,1,0.059171199798583984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,1,0.06479520201683045
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,7,0.0647216022014618
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,float16,4095,0.3869839906692505
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,1,128,1,float16,fp8,4095,0.25666720867156984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,3,0.05893440246582031
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,3,0.06428800225257873
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,7,0.05864319801330566
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,15,0.05928639769554138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,15,0.06479039788246155
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,31,0.058852797746658324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,31,0.06486719846725464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,63,0.06275680065155029
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,63,0.06499680280685424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,127,0.06908320188522339
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,127,0.06748800277709961
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,255,0.09374399781227112
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,255,0.08795679807662964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,511,0.13614239692687988
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,511,0.11822719573974609
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,1023,0.22378239631652833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,1023,0.17287039756774902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,2047,0.3910128116607666
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,2047,0.2667936086654663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,1,0.020403200387954713
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,1,0.021059200167655945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,3,0.020164799690246583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,fp8,4095,0.46735358238220215
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,4,2,128,1,float16,float16,4095,0.7381680011749268
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,3,0.021198399364948273
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,7,0.02027360051870346
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,7,0.021028800308704375
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,15,0.020241600275039674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,15,0.021030400693416596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,31,0.0203792005777359
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,31,0.0211776003241539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,63,0.020425599813461304
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,63,0.02107519954442978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,127,0.02038239985704422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,127,0.020980800688266753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,255,0.02173279970884323
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,255,0.023048000037670137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,511,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,511,0.027379199862480164
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,1023,0.02582240104675293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,1023,0.026923200488090514
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,2047,0.026105600595474242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,2047,0.027166399359703063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,4095,0.02776159942150116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,4095,0.027744001150131224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,8191,0.03251520097255707
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,8191,0.033030399680137636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,16383,0.05423840284347534
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,16383,0.04456160068511963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,32767,0.0774191975593567
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,32767,0.06279680132865906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,float16,65535,0.12033120393753052
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,1,128,1,float16,fp8,65535,0.09020959734916686
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,7,0.02033119946718216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,1,0.0203247994184494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,1,0.021329599618911742
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,3,0.0203232005238533
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,3,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,7,0.021367999911308288
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,15,0.020550400018692017
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,15,0.021171200275421142
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,31,0.02032800018787384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,31,0.021279999613761903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,63,0.020478400588035583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,63,0.02093600034713745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,127,0.020289599895477295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,127,0.021328000724315642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,255,0.021985599398612977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,255,0.023177599906921385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,511,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,511,0.02688640058040619
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,1023,0.025889599323272706
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,1023,0.02709920108318329
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,2047,0.027619200944900512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,2047,0.027905601263046264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,16383,0.058543998003005984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,4095,0.030884799361228944
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,4095,0.03164960145950317
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,8191,0.050483202934265135
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,8191,0.039483198523521425
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,16383,0.07237600088119507
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,32767,0.11797440052032471
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,32767,0.08574079871177673
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,float16,65535,0.20390241146087645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,4,2,128,1,float16,fp8,65535,0.13587679862976074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,1,0.06629599928855896
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,1,0.07163680195808411
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,3,0.06610080003738403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,3,0.07220799922943115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,7,0.06621760129928589
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,7,0.0724847972393036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,15,0.06600000262260437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,15,0.07211999893188477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,31,0.06640639901161194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,31,0.07185119986534119
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,63,0.06899039745330811
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,63,0.07224000096321107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,127,0.07755680084228515
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,127,0.0759440004825592
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,255,0.10034400224685669
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,255,0.09467999935150147
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,511,0.13989280462265014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,511,0.12647039890289308
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,1023,0.22778079509735108
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,1023,0.1789136052131653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,1,0.09435999989509583
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,float16,2047,0.39498560428619384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,1,0.10586880445480347
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,1,128,1,float16,fp8,2047,0.27655839920043945
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,3,0.09440000057220459
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,3,0.10627039670944213
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,7,0.09379519820213318
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,7,0.10602400302886963
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,63,0.1102895975112915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,15,0.09420959949493408
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,15,0.10609439611434937
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,31,0.09470080137252808
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,31,0.10649919509887695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,63,0.10231839418411255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,511,0.23455519676208497
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,127,0.10805599689483643
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,127,0.11778559684753417
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,255,0.15424959659576415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,255,0.14551999568939208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,511,0.20823678970336915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,1023,0.39489600658416746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,1,0.11747839450836181
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,1023,0.3070895910263062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,1,0.10646400451660157
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,3,0.10561599731445312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,float16,2047,0.7342720031738281
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,4,2,128,1,float16,fp8,2047,0.49727840423583985
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,3,0.11787519454956055
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,7,0.1064736008644104
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,7,0.11742559671401978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,15,0.10623999834060668
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,15,0.11770720481872558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,127,0.12134560346603393
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,31,0.10513919591903687
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,31,0.11789439916610718
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,63,0.1154096007347107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,63,0.12596160173416138
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,127,0.12984639406204224
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,255,0.15787359476089477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,255,0.1573024034500122
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,1,0.16111359596252442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,511,0.23192479610443115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,511,0.2190160036087036
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,float16,1023,0.40244641304016116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,1,128,1,float16,fp8,1023,0.32139201164245607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,1,0.18371039628982544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,3,0.16223039627075195
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,15,0.16116479635238648
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,3,0.18309439420700074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,7,0.15926400423049927
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,7,0.18271679878234864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,15,0.1839743971824646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,31,0.16736479997634887
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,31,0.18559839725494384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,63,0.1723407983779907
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,63,0.1979599952697754
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,127,0.1787984013557434
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,127,0.2009023904800415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,255,0.2510256052017212
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,255,0.25655839443206785
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,511,0.41924161911010743
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,1,0.020153599977493285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,1,0.02102400064468384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,511,0.3794447898864746
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,3,0.020334400236606598
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,3,0.021291199326515197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,float16,1023,0.7447120189666748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,31,0.021320000290870667
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,7,0.02040639966726303
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,7,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,4,2,128,1,float16,fp8,1023,0.5734687805175781
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,15,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,15,0.02125599980354309
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,255,0.022064000368118286
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,31,0.020326399803161622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,63,0.02139520049095154
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,63,0.020343999564647674
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,127,0.02078399956226349
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,127,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,255,0.022987200319766997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,511,0.02496960014104843
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,511,0.027211201190948487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,1023,0.02595840096473694
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,1023,0.027215999364852906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,2047,0.027323201298713684
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,16383,0.07607679963111877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,2047,0.02789599895477295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,4095,0.030908799171447753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,32767,0.08774880170822144
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,4095,0.03225600123405457
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,8191,0.051374399662017824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,8191,0.042078399658203126
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,16383,0.062084800004959105
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,32767,0.11986080408096314
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,float16,65535,0.2063647985458374
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,1,0.020470400154590607
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,1,128,1,float16,fp8,65535,0.13831839561462403
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,1,0.021598400175571443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,3,0.020691199600696562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,31,0.020443199574947356
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,3,0.021814399957656862
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,7,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,7,0.0215488001704216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,15,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,15,0.021193599700927733
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,31,0.021532799303531646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,63,0.02067680060863495
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,63,0.021267199516296388
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,127,0.02091040015220642
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,127,0.021451200544834136
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,255,0.022247999906539917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,255,0.023588800430297853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,511,0.02524000108242035
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,511,0.027033600211143493
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,1023,0.027561599016189577
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,1023,0.02778559923171997
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,2047,0.0321696013212204
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,2047,0.03152480125427246
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,4095,0.050273597240448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,4095,0.04235999882221222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,8191,0.0736191987991333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,8191,0.05895360112190247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,16383,0.11780639886856079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,16383,0.08391680121421814
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,32767,0.20358719825744628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,32767,0.13751360177993774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,1,0.021857599914073943
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,1,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,float16,65535,0.3786384105682373
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,15,0.021620799601078034
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,4,2,128,1,float16,fp8,65535,0.23762719631195067
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,3,0.02189279943704605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,3,0.022614400088787078
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,31,0.023104000091552734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,7,0.021648000180721282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,7,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,31,0.021615999937057494
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,15,0.022918400168418885
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,63,0.021825599670410156
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,63,0.0227743998169899
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,127,0.022259199619293214
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,127,0.022609600424766542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,255,0.02373439967632294
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,255,0.02452320009469986
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,511,0.026576000452041625
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,511,0.02864319980144501
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,1023,0.03300319910049439
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,1023,0.033211201429367065
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,2047,0.05156480073928833
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,2047,0.04261760115623474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,4095,0.07354400157928467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,4095,0.05921919941902161
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,8191,0.11944799423217774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,8191,0.08622559905052185
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,16383,0.2093264102935791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,16383,0.1420367956161499
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,32767,0.3838511943817139
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,1,0.01752000004053116
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,32767,0.24049599170684816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,1,0.018456000089645385
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,3,0.017441600561141968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,7,0.017166399955749513
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,3,0.018428799510002137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,65535,0.4370704174041748
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,65535,0.7305776119232178
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,7,0.018481600284576415
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,15,0.01746080070734024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,15,0.018222400546073915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,31,0.017534400522708892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,fp8,131071,0.8400527954101562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,31,0.018187199532985688
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,63,0.017422400414943695
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,64,1,2,1,128,1,float16,float16,131071,1.427467155456543
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,63,0.01855359971523285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,127,0.017423999309539796
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,127,0.018236799538135527
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,255,0.019120000302791595
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,255,0.020158399641513825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,511,0.021726399660110474
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,511,0.024195200204849242
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,1023,0.022524799406528472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,1023,0.024139200150966645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,2047,0.02299039959907532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,2047,0.023975999653339387
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,4095,0.023086400330066682
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,4095,0.024404799938201903
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,8191,0.024907200038433074
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,8191,0.026339200139045716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,16383,0.027884799242019653
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,16383,0.029840001463890077
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,32767,0.030078399181365966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,32767,0.030396801233291627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,65535,0.03437120020389557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,65535,0.03457440137863159
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,float16,131071,0.05351520180702209
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1,1,2,1,128,1,float16,fp8,131071,0.04522719979286194
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,1,0.018054400384426118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,1,0.019089600443840025
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,3,0.01818079948425293
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,3,0.018987199664115904
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,7,0.01835999935865402
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,7,0.018963199853897095
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,15,0.018036800622940063
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,15,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,31,0.018542400002479552
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,31,0.01916159987449646
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,63,0.018353599309921264
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,63,0.01897599995136261
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,127,0.018401600420475006
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,127,0.019171200692653656
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,255,0.019801600277423857
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,255,0.021188800036907197
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,511,0.023028799891471864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,511,0.025110399723052977
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,1023,0.023758399486541747
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,1023,0.02503040134906769
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,2047,0.024201600253582
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,2047,0.025305598974227905
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,4095,0.023958399891853333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,4095,0.02555840015411377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,8191,0.025659200549125672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,8191,0.02746880054473877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,16383,0.030472001433372496
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,16383,0.031020799279212953
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,32767,0.03444800078868866
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,32767,0.034862399101257324
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,65535,0.051678401231765744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,65535,0.04089120030403137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,float16,131071,0.07497919797897339
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2,1,2,1,128,1,float16,fp8,131071,0.06095359921455383
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,1,0.02120479941368103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,1,0.02210880070924759
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,3,0.021055999398231506
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,3,0.02175839990377426
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,7,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,7,0.0219200000166893
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,15,0.02084160000085831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,15,0.02191520035266876
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,31,0.02106720060110092
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,31,0.0220223993062973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,63,0.02099359929561615
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,63,0.021799999475479125
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,127,0.021166400611400606
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,127,0.021878400444984437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,255,0.02290080040693283
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,255,0.02396800071001053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,511,0.02598400115966797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,511,0.027382400631904603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,1023,0.02826080024242401
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,1023,0.028764799237251282
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,2047,0.030276799201965333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,2047,0.03232640027999878
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,4095,0.05114719867706299
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,4095,0.038780799508094786
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,8191,0.07437120079994201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,8191,0.059836798906326295
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,16383,0.12207520008087158
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,16383,0.08825439810752869
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,32767,0.20908958911895753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,32767,0.14139519929885863
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,65535,0.38436160087585447
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,65535,0.23993120193481446
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,1,0.020156799256801604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,1,0.020849600434303284
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,3,0.020172800123691558
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,3,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,float16,131071,0.7294735908508301
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,32,1,2,1,128,1,float16,fp8,131071,0.4397280216217041
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,7,0.020046399533748628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,7,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,15,0.020239999890327452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,15,0.02086080014705658
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,31,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,31,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,63,0.0202224001288414
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,63,0.021036800742149354
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,127,0.020068800449371337
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,127,0.02099519968032837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,255,0.021649600565433504
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,255,0.022886399924755097
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,511,0.02481600046157837
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,511,0.026638400554656983
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,1023,0.025567999482154845
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,1023,0.026846399903297423
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,16383,0.03593119978904724
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,2047,0.02571679949760437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,16383,0.03633280098438263
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,2047,0.027143999934196472
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,4095,0.025966399908065797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,4095,0.02707360088825226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,8191,0.029411199688911437
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,8191,0.02935839891433716
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,32767,0.05384960174560547
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,32767,0.04518879950046539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,65535,0.07751200199127198
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,65535,0.06282560229301452
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,float16,131071,0.12247999906539916
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,4,1,2,1,128,1,float16,fp8,131071,0.09122400283813477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,7,0.024556800723075867
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,1,0.023630400002002717
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,1,0.024512000381946564
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,3,0.023681600391864777
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,3,0.024409599602222443
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,7,0.02375359982252121
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,15,0.024128000438213348
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,15,0.024700799584388734
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,31,0.023588800430297853
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,31,0.024592000246047973
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,63,0.02343360036611557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,63,0.024558399617671967
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,127,0.024208000302314757
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,1023,0.050625598430633544
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,127,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,255,0.025651198625564576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,255,0.02671839892864227
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,511,0.03190400004386902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,511,0.030697599053382874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,1023,0.04006560146808624
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,2047,0.07267199754714966
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,2047,0.058385598659515384
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,4095,0.11668000221252442
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,4095,0.08320479989051818
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,8191,0.20184481143951416
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,8191,0.13612960577011107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,16383,0.37857439517974856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,16383,0.23726561069488525
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,1,0.030417600274086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,32767,0.4343711853027344
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,32767,0.7226272106170655
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,1,0.03204799890518188
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,3,0.03210079967975617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,3,0.030380800366401672
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,15,0.03221119940280914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,15,0.03055039942264557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,31,0.030561599135398864
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,7,0.030462399125099182
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,7,0.032216000556945804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,31,0.03208479881286621
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,float16,65535,1.4225135803222657
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,63,0.030436798930168152
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,63,0.03196159899234772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,127,0.030375999212265015
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,128,1,2,1,128,1,float16,fp8,65535,0.8265439987182617
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,127,0.032174399495124816
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,255,0.037432000041007996
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,255,0.035815998911857605
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,511,0.05389119982719422
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,2047,0.09067040085792541
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,511,0.04711039960384369
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,1023,0.07953919768333435
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,1023,0.06880159974098206
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,2047,0.12140640020370483
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,4095,0.2069920063018799
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,4095,0.1443503975868225
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,8191,0.38314080238342285
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,8191,0.24413440227508545
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,1,0.04470239877700806
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,fp8,16383,0.4361904144287109
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,1,0.04747999906539917
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,3,0.04442239999771118
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,3,0.048187199234962466
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,256,1,2,1,128,1,float16,float16,16383,0.7267888069152832
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,7,0.04448480010032654
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,7,0.04784800112247467
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,15,0.04454559981822968
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,31,0.044470399618148804
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,15,0.048161599040031436
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,31,0.04729120135307312
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,63,0.04471679925918579
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,63,0.0478879988193512
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,127,0.04599680006504059
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,127,0.047886401414871216
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,255,0.06559360027313232
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,255,0.058676797151565555
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,511,0.0878928005695343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,2047,0.1561519980430603
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,511,0.07692959904670715
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,1023,0.1349936008453369
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,4095,0.3867680072784424
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,1023,0.1057055950164795
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,2047,0.2171072006225586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,3,0.020340800285339355
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,3,0.021132799983024596
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,1,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,4095,0.25564959049224856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,1,0.0211776003241539
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,fp8,8191,0.4559199810028076
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,512,1,2,1,128,1,float16,float16,8191,0.7304384231567382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,7,0.020931200683116914
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,7,0.020313599705696107
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,15,0.02019840031862259
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,15,0.020691199600696562
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,31,0.02028000056743622
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,31,0.021110400557518005
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,63,0.02022559940814972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,63,0.021382400393486024
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,127,0.020099200308322906
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,127,0.020942400395870208
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,255,0.02181279957294464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,255,0.022944000363349915
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,511,0.024803200364112855
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,511,0.02685439884662628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,1023,0.025523200631141663
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,1023,0.027315199375152588
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,2047,0.026124799251556398
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,2047,0.02726239860057831
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,4095,0.02789919972419739
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,4095,0.02797600030899048
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,8191,0.03342719972133636
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,8191,0.033641600608825685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,16383,0.05443199872970581
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,16383,0.043131199479103086
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,32767,0.07717120051383972
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,32767,0.06351040005683899
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,65535,0.12202880382537842
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,65535,0.09115520119667053
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,float16,131071,0.21193439960479737
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,3,0.0719871997833252
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,7,0.06587679982185364
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,8,1,2,1,128,1,float16,fp8,131071,0.14586559534072877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,1,0.06611679792404175
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,1,0.07218080163002014
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,3,0.06618559956550599
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,15,0.06621440052986145
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,7,0.07173759937286377
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,15,0.07183359861373902
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,31,0.06633599996566772
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,31,0.07186239957809448
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,63,0.07008000016212464
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,63,0.07250239849090576
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,127,0.07667359709739685
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,127,0.0745360016822815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,255,0.0996399998664856
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,255,0.0945855975151062
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,511,0.138644802570343
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,511,0.12673439979553222
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,1023,0.22651200294494628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,1023,0.17824000120162964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,2047,0.39298720359802247
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,2047,0.2734960079193115
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,1,0.10610079765319824
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,1,0.11736160516738892
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,3,0.10607999563217163
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,float16,4095,0.7349055767059326
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,1024,1,2,1,128,1,float16,fp8,4095,0.47393598556518557
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,3,0.11780639886856079
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,7,0.10524959564208984
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,7,0.11779839992523193
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,15,0.10588159561157226
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,15,0.11760319471359253
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,31,0.10613919496536255
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,31,0.11766879558563233
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,63,0.11529760360717774
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,63,0.12115039825439453
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,127,0.12089600563049316
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,127,0.12940479516983033
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,255,0.15788160562515258
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,255,0.15700000524520874
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,511,0.23254239559173584
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,1023,0.32285120487213137
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,511,0.22031838893890382
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,1023,0.3994271993637085
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,1,0.020380799472332
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,1,0.02107519954442978
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,3,0.02019519954919815
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,3,0.021433599293231964
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,7,0.02029920071363449
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,float16,2047,0.7395679950714111
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,7,0.020929600298404693
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,2048,1,2,1,128,1,float16,fp8,2047,0.5114016056060791
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,15,0.020502400398254395
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,15,0.020980800688266753
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,31,0.020175999402999877
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,31,0.021396799385547637
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,63,0.020531199872493744
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,63,0.02117920070886612
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,127,0.020080000162124634
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,255,0.022441600263118745
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,127,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,255,0.022920000553131103
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,511,0.025017601251602174
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,511,0.02736319899559021
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,1023,0.02611519992351532
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,1023,0.027377599477767946
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,2047,0.027588799595832825
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,2047,0.028011199831962586
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,4095,0.031569600105285645
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,4095,0.03142879903316498
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,8191,0.05165119767189026
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,8191,0.03930079936981201
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,16383,0.07708320021629333
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,16383,0.06262080073356628
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,32767,0.120961594581604
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,32767,0.08905439972877502
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,65535,0.2080672025680542
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,65535,0.14159679412841797
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,float16,131071,0.38368639945983884
SGLang,0.5.6.post2,NVIDIA H100 80GB HBM3,generation_attention,flash_attention,16,1,2,1,128,1,float16,fp8,131071,0.2397088050842285
