framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,1,1,0,0.10463039875030518
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,4,1,0,0.10432319641113282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,8,1,0,0.10404160022735595
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,16,1,0,0.10443199872970581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,32,1,0,0.11270719766616821
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,64,1,0,0.11065599918365479
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,128,1,0,0.10903680324554443
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,256,1,0,0.10845119953155517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,512,1,0,0.10890239477157593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,1024,1,0,0.10881279706954956
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,2048,1,0,0.11091840267181396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,4096,1,0,0.11413439512252807
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,8192,1,0,0.1181056022644043
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1,16384,1,0,0.12686400413513182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,1,1,0,0.09847360253334045
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,4,1,0,0.09828479886054993
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,8,1,0,0.09850879907608032
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,16,1,0,0.09880639910697937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,32,1,0,0.1086400032043457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,64,1,0,0.10525120496749878
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,128,1,0,0.10390080213546753
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,256,1,0,0.10379519462585449
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,512,1,0,0.10333759784698486
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,1024,1,0,0.10528000593185424
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,2048,1,0,0.10812159776687622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,4096,1,0,0.10916800498962402
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,8192,1,0,0.11420799493789673
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,2,16384,1,0,0.12124480009078979
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,1,1,0,0.1004863977432251
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,4,1,0,0.10061119794845581
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,8,1,0,0.09983360171318054
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,16,1,0,0.10446720123291016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,32,1,0,0.10973440408706665
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,64,1,0,0.10765119791030883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,128,1,0,0.10688320398330689
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,256,1,0,0.10650559663772582
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,512,1,0,0.10811519622802734
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,1024,1,0,0.11055359840393067
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,2048,1,0,0.11155200004577637
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,4096,1,0,0.11813119649887086
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,8192,1,0,0.12497600317001342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,4,16384,1,0,0.13908480405807494
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,1,1,0,0.0959231972694397
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,4,1,0,0.09594879746437072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,8,1,0,0.09633920192718506
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,16,1,0,0.09675840139389039
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,32,1,0,0.10666559934616089
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,64,1,0,0.10420479774475097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,128,1,0,0.10468800067901611
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,256,1,0,0.10616960525512695
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,512,1,0,0.10717120170593261
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,1024,1,0,0.10958399772644042
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,2048,1,0,0.11695679426193237
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,4096,1,0,0.12425919771194457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,8192,1,0,0.138755202293396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,8,16384,1,0,0.16624640226364135
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,1,1,0,0.0687936007976532
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,1,1,0,0.09888319969177246
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,4,1,0,0.0692575991153717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,4,1,0,0.09822400212287903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,8,1,0,0.09866880178451538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,8,1,0,0.06884480118751526
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,16,1,0,0.09855039715766907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,16,1,0,0.06906880140304565
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,32,1,0,0.0776095986366272
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,32,1,0,0.10964800119400024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,64,1,0,0.0750656008720398
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,64,1,0,0.1102944016456604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,128,1,0,0.07479680180549622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,128,1,0,0.11233600378036498
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,256,1,0,0.07399680018424988
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,256,1,0,0.11405760049819946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,512,1,0,0.07402880191802978
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,512,1,0,0.11399040222167969
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,1024,1,0,0.07838400006294251
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,1024,1,0,0.11835520267486573
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,2048,1,0,0.07809280157089234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,2048,1,0,0.12654399871826172
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,4096,1,0,0.08022720217704774
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,4096,1,0,0.1412608027458191
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,8192,1,0,0.08395199775695801
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1,16384,1,0,0.09075199961662292
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,8192,1,0,0.1688256025314331
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,16,16384,1,0,0.22689919471740722
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,1,1,0,0.0930624008178711
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,4,1,0,0.09327359795570374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,1,1,0,0.0679744005203247
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,8,1,0,0.0933023989200592
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,4,1,0,0.0680288016796112
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,16,1,0,0.09427520036697387
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,8,1,0,0.06815680265426635
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,32,1,0,0.095004802942276
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,16,1,0,0.0685696005821228
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,64,1,0,0.09665600061416627
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,32,1,0,0.07742080092430115
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,128,1,0,0.09711679816246033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,64,1,0,0.07527999877929688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,128,1,0,0.07359039783477783
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,256,1,0,0.11160000562667846
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,256,1,0,0.07334399819374085
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,512,1,0,0.12097280025482178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,512,1,0,0.07312639951705932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,1024,1,0,0.1284448027610779
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,1024,1,0,0.07310400009155274
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,2048,1,0,0.07518079876899719
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,2048,1,0,0.14335039854049683
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,4096,1,0,0.0782144010066986
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,32,4096,1,0,0.1730272054672241
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,8192,1,0,0.08290560245513916
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,1,1,0,0.09546239972114563
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,2,16384,1,0,0.09395840167999267
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,4,1,0,0.09562879800796509
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,1,1,0,0.06880639791488648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,8,1,0,0.09553599953651429
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,4,1,0,0.06871039867401123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,8,1,0,0.06912320256233215
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,16,1,0,0.09546239972114563
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,16,1,0,0.06893119812011719
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,32,1,0,0.09508799910545349
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,32,1,0,0.07927680015563965
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,64,1,0,0.07816640138626099
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,64,1,0,0.09702720046043396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,128,1,0,0.07452160120010376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,128,1,0,0.09857280254364013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,256,1,0,0.07409279942512512
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,256,1,0,0.1278272032737732
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,512,1,0,0.07455040216445923
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,1024,1,0,0.0769312024116516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,512,1,0,0.14233920574188233
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,2048,1,0,0.07945280075073242
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,1024,1,0,0.16070719957351684
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,4096,1,0,0.08014079928398132
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,8192,1,0,0.08697919845581055
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,2048,1,0,0.18916480541229247
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,4,16384,1,0,0.0987936019897461
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,64,4096,1,0,0.24623360633850097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,1,1,0,0.06710079908370972
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,4,1,0,0.06674879789352417
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,1,1,0,0.11328639984130859
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,1,1,0,0.10092799663543701
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,8,1,0,0.06752960085868835
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,4,1,0,0.11288319826126099
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,4,1,0,0.10008000135421753
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,16,1,0,0.06750400066375732
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,8,1,0,0.11343040466308593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,8,1,0,0.10032000541687011
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,32,1,0,0.07767999768257142
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,16,1,0,0.11407359838485717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,16,1,0,0.10140160322189332
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,64,1,0,0.07486079931259156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,32,1,0,0.11417920589447021
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,32,1,0,0.10074559450149537
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,128,1,0,0.07351040244102477
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,64,1,0,0.11778240203857422
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,64,1,0,0.10218559503555298
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,256,1,0,0.07434239983558655
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,128,1,0,0.12026560306549072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,128,1,0,0.10431679487228393
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,512,1,0,0.07568640112876893
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,256,1,0,0.129475200176239
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,256,1,0,0.10758719444274903
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,1,1,0,0.05469120144844055
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,1024,1,0,0.07868480086326599
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,512,1,0,0.11017600297927857
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,512,1,0,0.14569599628448487
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,4,1,0,0.05559679865837097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,2048,1,0,0.07934079766273498
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,1024,1,0,0.11099519729614257
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,8,1,0,0.055471998453140256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,1,1,0,0.05232639908790589
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,4096,1,0,0.08787519931793213
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,2048,1,0,0.11999679803848266
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,1024,1,0,0.17485760450363158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,16,1,0,0.05520319938659668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,8192,1,0,0.09775360226631165
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,4,1,0,0.05198079943656921
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,4096,1,0,0.11346559524536133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,32,1,0,0.05486720204353333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,8,1,0,0.05221760272979736
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,8,16384,1,0,0.11544320583343506
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,128,2048,1,0,0.23273921012878418
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,64,1,0,0.05661439895629883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,8192,1,0,0.1251871943473816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,16,1,0,0.05225279927253723
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,1,1,0,0.06660159826278686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,128,1,0,0.05694079995155334
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,1,1,0,0.04213440120220184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,1,1,0,0.16335999965667725
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1,16384,1,0,0.12178560495376586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,32,1,0,0.05722879767417908
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,4,1,0,0.06604160070419311
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,256,1,0,0.060924798250198364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,1,1,0,0.03841600120067597
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,4,1,0,0.04207360148429871
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,1,1,0,0.07184320092201232
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,4,1,0,0.16424319744110108
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,64,1,0,0.05647680163383484
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,8,1,0,0.06619200110435486
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,512,1,0,0.06415680050849915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,4,1,0,0.03856000006198883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,8,1,0,0.042371198534965515
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,4,1,0,0.07178239822387696
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,128,1,0,0.055580800771713255
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,8,1,0,0.1642016053199768
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,16,1,0,0.06642879843711853
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,1024,1,0,0.06542400121688843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,8,1,0,0.03857280015945434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,16,1,0,0.042559999227523806
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,1,1,0,0.103603196144104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,8,1,0,0.07124480009078979
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,256,1,0,0.05668479800224304
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,32,1,0,0.07707200050354004
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,16,1,0,0.16453759670257567
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,16,1,0,0.07275840044021606
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,2048,1,0,0.06669120192527771
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,16,1,0,0.03869760036468506
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,32,1,0,0.04223679900169373
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,4,1,0,0.10352319478988647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,512,1,0,0.058422398567199704
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,64,1,0,0.07417600154876709
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,32,1,0,0.07216960191726685
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,8,1,0,0.10432319641113282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,32,1,0,0.16651840209960939
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,4096,1,0,0.06871680021286011
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,32,1,0,0.04074560105800629
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,64,1,0,0.04394879937171936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,1024,1,0,0.06321280002593994
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,128,1,0,0.07507200241088867
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,64,1,0,0.07360960245132446
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,8192,1,0,0.07208960056304932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,16,1,0,0.10419199466705323
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,64,1,0,0.04047679901123047
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,128,1,0,0.04614399969577789
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,128,1,0,0.07601600289344787
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,2048,1,0,0.06352639794349671
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1,16384,1,0,0.07529600262641907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,256,1,0,0.0762336015701294
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,64,1,0,0.17236800193786622
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,32,1,0,0.10439679622650147
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,256,1,0,0.07955200076103211
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,128,1,0,0.04029119908809662
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,256,1,0,0.045731198787689206
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,4096,1,0,0.0655456006526947
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,64,1,0,0.1044927954673767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,512,1,0,0.07784000039100647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,512,1,0,0.0809984028339386
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,256,1,0,0.0411327987909317
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,128,1,0,0.17972480058670043
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,512,1,0,0.04761280119419098
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,8192,1,0,0.06980479955673217
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,128,1,0,0.10731199979782105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,1024,1,0,0.0810368001461029
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,1024,1,0,0.07915840148925782
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,512,1,0,0.04297919869422913
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,1,1,0,0.05448960065841675
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,256,1,0,0.11056640148162841
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1,16384,1,0,0.07740479707717896
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,1024,1,0,0.04811519980430603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,2048,1,0,0.08241919875144958
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,2048,1,0,0.08754559755325317
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,256,1,0,0.19470399618148804
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,512,1,0,0.11299519538879395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,1024,1,0,0.046614399552345274
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,4,1,0,0.054655998945236206
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,2048,1,0,0.048732799291610715
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,4096,1,0,0.08467519879341126
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,8,1,0,0.05551360249519348
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,2048,1,0,0.04650880098342895
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,1024,1,0,0.11294080018997192
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,4096,1,0,0.09680960178375245
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,16,1,0,0.05509759783744812
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,4096,1,0,0.05032640099525452
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,1,1,0,0.05255680084228516
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,4096,1,0,0.048393601179122926
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,8192,1,0,0.08694720268249512
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,2048,1,0,0.11547199487686158
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,512,1,0,0.22485120296478273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,8192,1,0,0.053401601314544675
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,32,1,0,0.05524160265922547
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,4,1,0,0.0523904025554657
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,8192,1,0,0.054073601961135864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1,16384,1,0,0.09138879776000977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,8192,1,0,0.11484800577163697
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,4096,1,0,0.11747519969940186
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1,16384,1,0,0.06349440217018128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,64,1,0,0.05575680136680603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,8,1,0,0.05252159833908081
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1,16384,1,0,0.059545600414276124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,256,1024,1,0,0.2855871915817261
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,8192,1,0,0.12239359617233277
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,128,1,0,0.05839040279388428
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,16,16384,1,0,0.1490048050880432
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,16,1,0,0.05251200199127197
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,2,16384,1,0,0.12975360155105592
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,256,1,0,0.06223679780960083
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,1,1,0,0.2793215990066528
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,1,1,0,0.06518399715423584
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,32,1,0,0.05742400288581848
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,1,1,0,0.07211520075798035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,1,1,0,0.10798079967498779
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,4,1,0,0.2823231935501099
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,512,1,0,0.06400319933891296
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,1,1,0,0.038761600852012634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,4,1,0,0.06504639983177185
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,64,1,0,0.05691199898719788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,4,1,0,0.07241920232772828
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,8,1,0,0.06535040140151978
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,1,1,0,0.0424703985452652
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,4,1,0,0.333459210395813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,1024,1,0,0.06486080288887024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,8,1,0,0.2821824073791504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,4,1,0,0.038601601123809816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,128,1,0,0.05640000104904175
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,8,1,0,0.07192000150680541
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,16,1,0,0.06559680104255676
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,4,1,0,0.04227519929409027
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,8,1,0,0.10823680162429809
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,2048,1,0,0.06558079719543457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,8,1,0,0.038764798641204835
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,256,1,0,0.05578879714012146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,16,1,0,0.28123838901519777
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,16,1,0,0.0725600004196167
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,32,1,0,0.07711679935455322
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,8,1,0,0.04235199987888336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,16,1,0,0.10894399881362915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,4096,1,0,0.06816959977149964
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,16,1,0,0.042243200540542605
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,64,1,0,0.07736319899559022
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,16,1,0,0.038889598846435544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,8192,1,0,0.07184640169143677
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,512,1,0,0.05828160047531128
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,32,1,0,0.07269759774208069
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,32,1,0,0.28317439556121826
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,32,1,0,0.1087839961051941
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,32,1,0,0.04226559996604919
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,128,1,0,0.07933760285377503
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,32,1,0,0.04182080030441284
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,64,1,0,0.29380478858947756
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,2,16384,1,0,0.07696639895439147
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,1024,1,0,0.05875840187072754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,64,1,0,0.07438719868659974
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,64,1,0,0.10967999696731567
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,64,1,0,0.04391680061817169
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,256,1,0,0.08120319843292237
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,128,1,0,0.3086623907089233
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,64,1,0,0.04056960046291351
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,1,1,0,0.05498239994049072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,2048,1,0,0.060550397634506224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,128,1,0,0.07574399709701538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,128,1,0,0.11172800064086914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,128,1,0,0.04565120041370392
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,512,1,0,0.0807807981967926
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,128,1,0,0.04066239893436432
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,4,1,0,0.055238401889801024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,4096,1,0,0.06360960006713867
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,256,1,0,0.07874240279197693
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,256,1,0,0.33986239433288573
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,256,1,0,0.1164031982421875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,256,1,0,0.046003198623657225
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,1024,1,0,0.09203839898109437
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,256,1,0,0.04149119853973389
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,8,1,0,0.054662400484085084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,8192,1,0,0.06824319958686828
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,512,1,0,0.08090239763259888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,512,1,0,0.11797440052032471
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,512,1,0,0.04768959879875183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,512,1,0,0.04379200041294098
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,2048,1,0,0.10158079862594604
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,16,1,0,0.054236799478530884
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,2,16384,1,0,0.07832000255584717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,512,512,1,0,0.39924159049987795
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,1024,1,0,0.08127679824829101
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,1024,1,0,0.11956160068511963
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,1024,1,0,0.048416000604629514
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,1024,1,0,0.043756800889968875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,32,1,0,0.05493119955062866
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,1,1,0,0.051785600185394284
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,32,4096,1,0,0.1204640030860901
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,2048,1,0,0.082777601480484
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,2048,1,0,0.12234560251235962
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,2048,1,0,0.04933759868144989
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,2048,1,0,0.04561919867992401
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1024,1,1,0,0.5122464179992676
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,64,1,0,0.0576416015625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,4,1,0,0.05222079753875732
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,1,1,0,0.0669983983039856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,4096,1,0,0.08496000170707703
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,4096,1,0,0.1268928050994873
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,4096,1,0,0.05225920081138611
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,4096,1,0,0.049795201420783995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,128,1,0,0.058736002445220946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,8,1,0,0.05241919755935669
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,4,1,0,0.06731839776039124
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,8192,1,0,0.09012799859046935
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1024,4,1,0,0.5154208183288574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,8192,1,0,0.13512320518493653
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,8192,1,0,0.058457601070404056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,2,16384,1,0,0.0955839991569519
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,8192,1,0,0.057014399766922
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,256,1,0,0.06264640092849731
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,16,1,0,0.05240960121154785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,8,1,0,0.06700479984283447
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,4,16384,1,0,0.15043840408325196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,2,16384,1,0,0.06848319768905639
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,1,1,0,0.07311360239982605
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1024,8,1,0,0.5169951915740967
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,2,16384,1,0,0.07081279754638672
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,512,1,0,0.06470080018043518
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,32,1,0,0.05848000049591064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,16,1,0,0.06735680103302003
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,1,1,0,0.10054399967193603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,1,1,0,0.04269759953022003
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,4,1,0,0.07305279970169068
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,1,1,0,0.03919360041618347
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,1024,1,0,0.06575999855995178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,64,1,0,0.056959998607635495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1024,16,1,0,0.5157472133636475
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,32,1,0,0.06740800142288209
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,4,1,0,0.04245119988918304
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,4,1,0,0.10130239725112915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,8,1,0,0.07361599802970886
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,4,1,0,0.03915199935436249
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,2048,1,0,0.06711999773979187
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,128,1,0,0.056908798217773435
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,64,1,0,0.06912320256233215
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,8,1,0,0.04264000058174133
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,8,1,0,0.10145280361175538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,16,1,0,0.07336959838867188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,8,1,0,0.03930880129337311
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1024,32,1,0,0.5174560070037841
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,4096,1,0,0.0705024003982544
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,256,1,0,0.05805119872093201
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,16,1,0,0.03938240110874176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,16,1,0,0.10212479829788208
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,16,1,0,0.042895999550819394
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,32,1,0,0.07270399928092956
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,128,1,0,0.07114560008049012
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,8192,1,0,0.07571520209312439
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,512,1,0,0.0581279993057251
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,32,1,0,0.041782400012016295
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,32,1,0,0.04293439984321594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,64,1,0,0.07516480088233948
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,32,1,0,0.1017408013343811
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,4,16384,1,0,0.0852832019329071
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,256,1,0,0.08612800240516663
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,1024,1,0,0.05909759998321533
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,64,1,0,0.04118080139160156
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1024,64,1,0,0.5408192157745362
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,64,1,0,0.04434239864349365
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,2048,1,0,0.062636798620224
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,64,1,0,0.10340800285339355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,128,1,0,0.040803200006484984
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,128,1,0,0.07616320252418518
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,1,1,0,0.052527999877929686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,128,1,0,0.04600639939308167
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,4096,1,0,0.06534079909324646
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,512,1,0,0.09702079892158508
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,256,1,0,0.04211199879646301
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,256,1,0,0.07947199940681457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,128,1,0,0.10496959686279297
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,4,1,0,0.0515999972820282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1024,128,1,0,0.5680992126464843
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,256,1,0,0.046342399716377256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,512,1,0,0.04203200042247772
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,8192,1,0,0.07213119864463806
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,8,1,0,0.05242239832878113
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,512,1,0,0.08109120130538941
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,1024,1,0,0.11110399961471558
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,256,1,0,0.11091840267181396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,512,1,0,0.0482015997171402
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,1024,1,0,0.04368320107460022
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,4,16384,1,0,0.08225280046463013
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,16,1,0,0.05276479721069336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,1,1,0,0.050547200441360476
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,2048,1,0,0.04728319942951202
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,512,1,0,0.11256320476531982
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,2048,1,0,0.1296704053878784
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,1024,1,0,0.0488864004611969
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,128,1024,256,1,0,0.6317215919494629
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,1024,1,0,0.08183680176734924
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,32,1,0,0.05366399884223938
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,4096,1,0,0.053491199016571046
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,4,1,0,0.05124160051345825
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,1024,1,0,0.11558079719543457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,2048,1,0,0.05139200091361999
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,64,1,0,0.0553056001663208
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,8192,1,0,0.0630240023136139
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,64,4096,1,0,0.1623520016670227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,2048,1,0,0.08526080250740051
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,4096,1,0,0.05747519731521607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,8,1,0,0.05140159726142883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,128,1,0,0.057014399766922
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,2048,1,0,0.12041280269622803
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,4,16384,1,0,0.07320640087127686
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,4096,1,0,0.08861119747161865
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,8192,1,0,0.06747519969940186
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,16,1,0,0.05117440223693848
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,1,1,0,0.07490559816360473
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,256,1,0,0.062009602785110474
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,1,1,0,0.037887999415397645
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,4096,1,0,0.12836480140686035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,512,1,0,0.06338239908218384
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,32,1,0,0.057574397325515746
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,4,1,0,0.07532479763031005
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,8192,1,0,0.09390720129013061
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,4,1,0,0.03806079924106598
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,4,16384,1,0,0.07569599747657776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,64,1,0,0.05575360059738159
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,1024,1,0,0.0648863971233368
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,8192,1,0,0.14215999841690063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,8,1,0,0.07559679746627808
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,8,1,0,0.03810240030288696
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,4,16384,1,0,0.1038591980934143
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,2048,1,0,0.06742079854011536
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,1,1,0,0.040870401263237
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,128,1,0,0.05668479800224304
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,1,1,0,0.06937599778175355
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,8,16384,1,0,0.16840959787368776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,16,1,0,0.03823040127754211
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,16,1,0,0.07526400089263915
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,4,1,0,0.040940800309181215
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,4096,1,0,0.07342399954795838
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,256,1,0,0.05683839917182922
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,4,1,0,0.0692351996898651
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,32,1,0,0.040992000699043275
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,8,1,0,0.04110400080680847
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,1,1,0,0.10191680192947387
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,512,1,0,0.058550399541854856
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,8192,1,0,0.08208640217781067
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,32,1,0,0.07564799785614014
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,8,1,0,0.06932479739189149
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,64,1,0,0.03959360122680664
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,16,1,0,0.04106239974498749
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,4,1,0,0.10176960229873658
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,32,1,0,0.041094401478767396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,1024,1,0,0.06111360192298889
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,16,1,0,0.069513601064682
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,8,16384,1,0,0.1002303957939148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,128,1,0,0.039900800585746764
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,64,1,0,0.07722880244255066
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,1,1,0,0.051926398277282716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,8,1,0,0.10181119441986083
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,64,1,0,0.042691200971603394
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,32,1,0,0.06902400255203248
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,256,1,0,0.039689600467681885
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,4,1,0,0.05201600193977356
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,2048,1,0,0.06176000237464905
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,128,1,0,0.07834240198135375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,128,1,0,0.04402559995651245
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,16,1,0,0.10221120119094848
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,64,1,0,0.07113919854164123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,512,1,0,0.041791999340057374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,4096,1,0,0.07225279808044434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,8,1,0,0.05294719934463501
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,32,1,0,0.10223040580749512
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,256,1,0,0.1112287998199463
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,256,1,0,0.045433598756790164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,128,1,0,0.07314559817314148
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,1024,1,0,0.044924798607826236
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,8192,1,0,0.0806656002998352
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,16,1,0,0.05284479856491089
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,64,1,0,0.1039423942565918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,512,1,0,0.0474047988653183
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,512,1,0,0.13078080415725707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,32,1,0,0.054895997047424316
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,2048,1,0,0.04947839975357056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,256,1,0,0.07746559977531434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,8,16384,1,0,0.09993919730186462
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,128,1,0,0.1059008002281189
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,1024,1,0,0.049744001030921935
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,1024,1,0,0.14971200227737427
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,64,1,0,0.055478399991989134
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,1,1,0,0.05003839731216431
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,512,1,0,0.07908480167388916
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,4096,1,0,0.06269760131835937
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,256,1,0,0.11502399444580078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,2048,1,0,0.05422400236129761
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,128,1,0,0.05738239884376526
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,4,1,0,0.05008640289306641
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,1024,1,0,0.08156480193138123
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,512,1,0,0.11844160556793212
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,128,2048,1,0,0.18483519554138184
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,1024,1,0,0.12297279834747314
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,4096,1,0,0.06557440161705017
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,8192,1,0,0.07174400091171265
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,8,1,0,0.05020800232887268
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,256,1,0,0.06285439729690552
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,2048,1,0,0.08516799807548522
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,1,1,0,0.09564480185508728
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,2048,1,0,0.13016639947891234
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,8,16384,1,0,0.08826559782028198
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,512,1,0,0.06548159718513488
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,8192,1,0,0.07377920150756836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,16,1,0,0.050569599866867064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,4096,1,0,0.09100800156593322
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,1,1,0,0.037894400954246524
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,4,1,0,0.09644160270690919
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,1024,1,0,0.06763839721679688
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,8192,1,0,0.10082240104675293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,4096,1,0,0.1435744047164917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,32,1,0,0.057145601511001586
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,8,16384,1,0,0.09235519766807557
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,4,1,0,0.03834240138530731
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,8,1,0,0.0970687985420227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,2048,1,0,0.0732159972190857
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,8,16384,1,0,0.11812800168991089
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,64,1,0,0.055276799201965335
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,16,1,0,0.09705280065536499
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,8192,1,0,0.16801919937133789
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,1,1,0,0.04147520065307617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,8,1,0,0.03792960047721863
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,4096,1,0,0.08274239897727967
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,1,1,0,0.06925439834594727
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,128,1,0,0.05619199872016907
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,32,1,0,0.09804480075836182
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,4,1,0,0.041382399201393125
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,16,1,0,0.0380511999130249
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,16,16384,1,0,0.22211840152740478
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,4,1,0,0.06928319931030273
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,8192,1,0,0.10089919567108155
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,256,1,0,0.05811200141906738
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,8,1,0,0.04145599901676178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,64,1,0,0.10191680192947387
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,32,1,0,0.040406399965286256
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,1,1,0,0.09770240187644959
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,8,1,0,0.0689408004283905
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,512,1,0,0.059683197736740114
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,16,1,0,0.04139519929885864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,16,16384,1,0,0.13645440340042114
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,64,1,0,0.04057919979095459
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,128,1,0,0.10642240047454835
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,4,1,0,0.10082880258560181
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,16,1,0,0.06957119703292847
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,1024,1,0,0.061337602138519284
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,32,1,0,0.041791999340057374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,1,1,0,0.05381119847297668
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,128,1,0,0.04037440121173859
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,8,1,0,0.09773759841918946
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,32,1,0,0.06939200162887574
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,256,1,0,0.11856319904327392
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,2048,1,0,0.06934720277786255
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,64,1,0,0.043068799376487735
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,4,1,0,0.05475839972496033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,256,1,0,0.04170880019664765
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,16,1,0,0.10169600248336792
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,64,1,0,0.07142720222473145
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,128,1,0,0.044486400485038755
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,4096,1,0,0.07880640029907227
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,8,1,0,0.05441920161247253
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,512,1,0,0.137990403175354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,512,1,0,0.044521600008010864
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,32,1,0,0.10165760517120362
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,128,1,0,0.07360000014305115
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,256,1,0,0.04679040014743805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,16,1,0,0.05469759702682495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,8192,1,0,0.09687680006027222
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,1024,1,0,0.048630398511886594
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,64,1,0,0.1026304006576538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,256,1,0,0.08021759986877441
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,512,1,0,0.049449598789215087
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,256,1024,1,0,0.17401599884033203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,32,1,0,0.05488319993019104
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,2048,1,0,0.06102399826049805
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,128,1,0,0.10480959415435791
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,512,1,0,0.08216959834098816
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,16,16384,1,0,0.1309664011001587
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,1024,1,0,0.054364800453186035
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,64,1,0,0.0568992018699646
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,1,1,0,0.15958720445632935
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,4096,1,0,0.07049599885940552
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,256,1,0,0.12071039676666259
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,1024,1,0,0.0852735996246338
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,1,1,0,0.0512287974357605
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,2048,1,0,0.06426560282707214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,128,1,0,0.05805439949035644
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,4,1,0,0.16092480421066285
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,8192,1,0,0.08762879967689514
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,512,1,0,0.12396800518035889
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,2048,1,0,0.09066560268402099
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,4,1,0,0.050633597373962405
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,4096,1,0,0.07427520155906678
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,256,1,0,0.06658880114555359
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,1024,1,0,0.131004798412323
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,8,1,0,0.16206719875335693
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,8,1,0,0.05055680274963379
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,4096,1,0,0.10060160160064698
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,16,16384,1,0,0.12271039485931397
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,512,1,0,0.06889920234680176
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,8192,1,0,0.09226239919662475
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,2048,1,0,0.14487680196762084
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,16,1,0,0.05095999836921692
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,16,1,0,0.16318080425262452
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,1,1,0,0.03854399919509888
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,8192,1,0,0.11769280433654786
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,1024,1,0,0.07402560114860535
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,32,1,0,0.05753920078277588
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,32,4096,1,0,0.17258880138397217
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,16,16384,1,0,0.12716480493545532
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,4,1,0,0.038022398948669434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,32,1,0,0.16517119407653807
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,64,1,0,0.05725439786911011
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,2048,1,0,0.08320000171661376
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,1,1,0,0.09970560073852539
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,8,1,0,0.0385343998670578
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,1,1,0,0.04188799858093262
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,16,16384,1,0,0.15246399641036987
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,128,1,0,0.05880320072174072
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,64,1,0,0.17089920043945311
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,4,1,0,0.10045759677886963
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,16,1,0,0.03825600147247314
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,32,4096,1,0,0.10113600492477418
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,4,1,0,0.042345601320266726
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,256,1,0,0.06076480150222778
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,1,1,0,0.06983680129051209
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,8,1,0,0.10062400102615357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,32,1,0,0.04147520065307617
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,128,1,0,0.17979199886322023
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,1,1,0,0.05636159777641296
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,8,1,0,0.042499199509620667
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,512,1,0,0.06260160207748414
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,64,1,0,0.04199039936065674
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,16,1,0,0.04219520092010498
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,4,1,0,0.056668800115585324
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,1024,1,0,0.07383040189743043
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,256,1,0,0.2006688117980957
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,4,1,0,0.06977599859237671
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,16,1,0,0.10105279684066773
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,128,1,0,0.043568000197410583
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,32,1,0,0.042559999227523806
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,8,1,0,0.05676800012588501
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,2048,1,0,0.08248000144958496
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,8,1,0,0.07017920017242432
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,32,1,0,0.10082240104675293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,256,1,0,0.04598079919815064
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,64,1,0,0.04385600090026855
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,32,4096,1,0,0.0997983992099762
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,16,1,0,0.05699840188026428
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,512,512,1,0,0.2389120101928711
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,16,1,0,0.07001919746398926
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,64,1,0,0.10247999429702759
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,32,1,0,0.05744959712028504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,512,1,0,0.050019198656082155
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,128,1,0,0.04579200148582459
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,1,1,0,0.05113599896430969
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,32,1,0,0.0704800009727478
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,128,1,0,0.10408320426940917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,64,1,0,0.059087997674942015
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1024,1,1,0,0.28230719566345214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,1024,1,0,0.06295679807662964
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,256,1,0,0.05055040121078491
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,4,1,0,0.05062400102615357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,64,1,0,0.07161279916763305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,256,1,0,0.11370240449905396
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,128,1,0,0.06067519783973694
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,2048,1,0,0.0725920021533966
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,512,1,0,0.05629119873046875
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1024,4,1,0,0.28493759632110593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,8,1,0,0.05119680166244507
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,128,1,0,0.07407039999961854
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,512,1,0,0.11992640495300293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,256,1,0,0.07262399792671204
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,1024,1,0,0.06506239771842956
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,32,4096,1,0,0.09024959802627563
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,16,1,0,0.05110399723052979
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,256,1,0,0.08540480136871338
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1024,8,1,0,0.2855936050415039
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,1024,1,0,0.1334879994392395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,512,1,0,0.0767520010471344
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,2048,1,0,0.07446079850196838
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,1,1,0,0.03678080141544342
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,32,1,0,0.05226879715919495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,512,1,0,0.08779199719429016
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,1024,1,0,0.0865887999534607
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,2048,1,0,0.15991359949111938
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1024,16,1,0,0.284006404876709
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,4,1,0,0.036601600050926206
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,64,1,0,0.053750401735305785
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,32,4096,1,0,0.09237120151519776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,1024,1,0,0.09375360012054443
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,2048,1,0,0.10460799932479858
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,8,1,0,0.03671680092811584
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,64,4096,1,0,0.21469440460205078
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,128,1,0,0.05596479773521423
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,1,1,0,0.04174720048904419
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1024,32,1,0,0.2880415916442871
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,2048,1,0,0.1026304006576538
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,16,1,0,0.03804160058498383
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,1,1,0,0.11905920505523682
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,256,1,0,0.06517120003700257
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,64,4096,1,0,0.1397312045097351
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,4,1,0,0.04140160083770752
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,32,1,0,0.03737280070781708
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,32,4096,1,0,0.12085119485855103
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,4,1,0,0.11857279539108276
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1024,64,1,0,0.2986176013946533
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,512,1,0,0.07611200213432312
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,1,1,0,0.061926400661468504
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,8,1,0,0.041126400232315063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,64,1,0,0.039340800046920775
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,1,1,0,0.07143679857254029
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,8,1,0,0.1189695954322815
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,1024,1,0,0.08906559944152832
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,4,1,0,0.06258879899978638
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,16,1,0,0.041571199893951416
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,128,1,0,0.04320000112056732
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,4,1,0,0.07190719842910767
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1024,128,1,0,0.31655681133270264
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,16,1,0,0.11926079988479614
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,8,1,0,0.0627615988254547
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,2048,1,0,0.10609920024871826
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,32,1,0,0.042044800519943235
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,256,1,0,0.05193600058555603
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,8,1,0,0.07214400172233582
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,32,1,0,0.119651198387146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,16,1,0,0.06259520053863525
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,64,1,0,0.04403199851512909
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,512,1,0,0.06573439836502075
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,16,1,0,0.07265920042991639
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,64,4096,1,0,0.1423200011253357
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,64,1,0,0.12248320579528808
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,64,1024,256,1,0,0.35594561100006106
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,32,1,0,0.06343680024147033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,128,1,0,0.047356799244880676
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,1024,1,0,0.07722240090370178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,32,1,0,0.07260479927062988
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,1,1,0,0.05681920051574707
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,128,1,0,0.12655359506607056
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,64,1,0,0.06512960195541381
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,256,1,0,0.061267197132110596
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,64,1,0,0.07383679747581481
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,2048,1,0,0.09382719993591308
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,4,1,0,0.05695040225982666
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,256,1,0,0.14453760385513306
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,128,1,0,0.06831039786338806
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,512,1,0,0.06659520268440247
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,128,1,0,0.07570559978485107
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,8,1,0,0.05940160155296326
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,64,4096,1,0,0.12918080091476442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,256,1,0,0.07855039834976196
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,512,1,0,0.15868159532546997
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,1024,1,0,0.07649599909782409
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,256,1,0,0.09289600253105164
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,16,1,0,0.05752639770507813
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,1,1,0,0.04221439957618713
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,512,1,0,0.08797439932823181
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,1024,1,0,0.1857759952545166
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,512,1,0,0.0987712025642395
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,2048,1,0,0.09413759708404541
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,32,1,0,0.05782080292701721
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,4,1,0,0.04171839952468872
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,1024,1,0,0.10566079616546631
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,1024,1,0,0.10769280195236205
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,64,1,0,0.05908160209655762
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,128,2048,1,0,0.2388607978820801
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,8,1,0,0.04286719858646393
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,64,4096,1,0,0.13041919469833374
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,128,1,0,0.06075199842453003
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,2048,1,0,0.12498240470886231
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,128,2048,1,0,0.14117759466171265
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,1,1,0,0.17144639492034913
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,16,1,0,0.04329920113086701
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,1,1,0,0.04707840085029602
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,256,1,0,0.08236799836158752
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,1,1,0,0.07899199724197388
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,4,1,0,0.17263360023498536
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,32,1,0,0.04320000112056732
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,64,4096,1,0,0.16206719875335693
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,4,1,0,0.047705599665641786
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,512,1,0,0.10028799772262573
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,4,1,0,0.07913920283317566
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,1,1,0,0.08142719864845276
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,64,1,0,0.046460801362991334
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,8,1,0,0.1729920029640198
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,8,1,0,0.04849919974803925
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,8,1,0,0.0788320004940033
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,1024,1,0,0.11826560497283936
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,4,1,0,0.0812831997871399
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,128,1,0,0.0507423996925354
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,16,1,0,0.17365440130233764
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,16,1,0,0.048240000009536745
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,16,1,0,0.07927680015563965
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,128,2048,1,0,0.15484800338745117
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,32,1,0,0.04932160079479218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,8,1,0,0.08102719783782959
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,32,1,0,0.17466239929199218
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,256,1,0,0.07005119919776917
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,32,1,0,0.07928320169448852
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,64,1,0,0.05247359871864319
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,16,1,0,0.08165439963340759
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,1,1,0,0.07109119892120361
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,64,1,0,0.18054720163345336
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,512,1,0,0.08707839846611024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,64,1,0,0.08384960293769836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,128,1,0,0.05848640203475952
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,32,1,0,0.08186879754066467
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,4,1,0,0.07172799706459046
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,128,1,0,0.18657599687576293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,1024,1,0,0.10583679676055908
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,128,1,0,0.08985599875450134
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,256,1,0,0.0692575991153717
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,64,1,0,0.08344640135765076
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,8,1,0,0.07207040190696716
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,256,1,0,0.21774721145629883
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,256,1,0,0.10861760377883911
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,512,1,0,0.07862399816513062
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,128,1,0,0.08507519960403442
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,128,2048,1,0,0.14022719860076904
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,16,1,0,0.07132480144500733
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,512,1,0,0.2459872007369995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,1024,1,0,0.09736319780349731
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,512,1,0,0.1278112053871155
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,256,1,0,0.09646720290184022
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,1,1,0,0.054502397775650024
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,32,1,0,0.07269120216369629
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,4,1,0,0.055632001161575316
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,128,2048,1,0,0.1322111964225769
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,512,1,0,0.10740159749984741
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,256,1024,1,0,0.29939520359039307
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,64,1,0,0.07597119808197021
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,256,1024,1,0,0.1644927978515625
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,8,1,0,0.05662400126457214
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,1,1,0,0.0616096019744873
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,1024,1,0,0.1251904010772705
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,128,1,0,0.08064640164375306
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,1,1,0,0.2963360071182251
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,1,1,0,0.11599040031433105
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,16,1,0,0.05740799903869629
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,4,1,0,0.06259840130805969
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,256,1,0,0.09239360094070434
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,4,1,0,0.29758079051971437
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,4,1,0,0.11668800115585327
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,128,2048,1,0,0.16209919452667237
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,32,1,0,0.05931839942932129
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,8,1,0,0.06208320260047913
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,512,1,0,0.11159679889678956
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,8,1,0,0.1167296051979065
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,8,1,0,0.2982048034667969
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,1,1,0,0.10387519598007203
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,64,1,0,0.062352001667022705
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,16,1,0,0.06370559930801392
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,16,1,0,0.11745599508285523
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,16,1,0,0.3009727954864502
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,4,1,0,0.10425920486450195
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,256,1024,1,0,0.1473855972290039
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,128,1,0,0.06753919720649719
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,32,1,0,0.06476799845695495
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,32,1,0,0.11883519887924195
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,8,1,0,0.10432959794998169
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,32,1,0,0.3015104055404663
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,64,1,0,0.07096959948539734
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,1,1,0,0.10360959768295289
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,256,1,0,0.0787231981754303
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,64,1,0,0.1279327988624573
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,16,1,0,0.10411200523376465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,128,1,0,0.07710400223731995
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,4,1,0,0.10386240482330322
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,64,1,0,0.31138560771942136
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,512,1,0,0.09763839840888977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,32,1,0,0.10451200008392333
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,128,1,0,0.1384511947631836
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,256,1,0,0.09343360066413879
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,8,1,0,0.10501439571380615
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,64,1,0,0.10831680297851562
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,128,1,0,0.32318079471588135
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,256,1024,1,0,0.13299839496612548
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,256,1,0,0.1711199998855591
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,512,1,0,0.11180800199508667
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,16,1,0,0.10618560314178467
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,128,1,0,0.11444799900054932
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,1,1,0,0.08172479867935181
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,256,1,0,0.3814336061477661
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,32,1,0,0.10949759483337403
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,256,1,0,0.13701119422912597
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,4,1,0,0.08239039778709412
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,256,1024,1,0,0.14728319644927979
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,512,512,1,0,0.20832960605621337
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,64,1,0,0.11609280109405518
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,8,1,0,0.08269760012626648
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,512,1,0,0.15667200088500977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,1,1,0,0.09469760060310364
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,512,512,1,0,0.43665599822998047
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,16,1,0,0.08386560082435608
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,128,1,0,0.12644799947738647
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1024,1,1,0,0.1958847999572754
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,4,1,0,0.09530240297317505
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,32,1,0,0.08578559756278992
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,256,1024,1,0,0.19338560104370117
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1024,1,1,0,0.5329440116882325
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,256,1,0,0.14557759761810302
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,8,1,0,0.09511039853096008
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1024,4,1,0,0.1968991994857788
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,64,1,0,0.09288319945335388
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,1,1,0,0.17229759693145752
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,16,1,0,0.09568639993667602
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1024,4,1,0,0.540553617477417
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1024,8,1,0,0.19748159646987914
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,512,512,1,0,0.1831488013267517
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,4,1,0,0.17233920097351074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,128,1,0,0.10290559530258178
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,32,1,0,0.09688000082969665
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1024,8,1,0,0.5457119941711426
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1024,16,1,0,0.19814720153808593
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,8,1,0,0.17342720031738282
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1024,1,1,0,0.1804352045059204
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,64,1,0,0.105132794380188
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,256,1,0,0.12162560224533081
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,16,1,0,0.17402880191802977
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1024,16,1,0,0.5482624053955079
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1024,32,1,0,0.20114240646362305
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1024,4,1,0,0.18273600339889526
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,128,1,0,0.11512960195541382
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,32,1,0,0.17583999633789063
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,512,512,1,0,0.15973759889602662
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1024,32,1,0,0.5503744125366211
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1024,64,1,0,0.2150752067565918
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1024,8,1,0,0.18426560163497924
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,64,1,0,0.1822208046913147
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,256,1,0,0.1440991997718811
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1024,1,1,0,0.12565120458602905
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1024,64,1,0,0.5644864082336426
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1024,128,1,0,0.23169279098510742
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,128,1,0,0.19131200313568114
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1024,16,1,0,0.18756159543991088
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,512,512,1,0,0.18133120536804198
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1024,4,1,0,0.12712960243225097
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,256,1,0,0.23119680881500243
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1024,128,1,0,0.5898655891418457
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1024,32,1,0,0.1907520055770874
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,32,1024,256,1,0,0.2897536039352417
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1024,1,1,0,0.14073599576950074
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1024,8,1,0,0.12893439531326295
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,512,512,1,0,0.2703007936477661
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1024,64,1,0,0.19975359439849855
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1024,4,1,0,0.14198720455169678
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1024,16,1,0,0.13181760311126708
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,128,1024,256,1,0,0.7038623809814453
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1024,1,1,0,0.29926400184631347
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1024,8,1,0,0.14320000410079955
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1024,128,1,0,0.21744959354400634
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1024,32,1,0,0.13706560134887696
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1024,4,1,0,0.3010432004928589
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1024,16,1,0,0.14515520334243776
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1024,64,1,0,0.14833920001983641
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1024,8,1,0,0.30062079429626465
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,32,1024,256,1,0,0.25614080429077146
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1024,32,1,0,0.1472991943359375
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1024,128,1,0,0.1653599977493286
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1024,16,1,0,0.3031775951385498
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1024,64,1,0,0.16232320070266723
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1024,32,1,0,0.304640007019043
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,flashinfer,fp8_block,fp8,16,1024,256,1,0,0.2038815975189209
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1024,128,1,0,0.17986880540847777
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1024,64,1,0,0.31610560417175293
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,16,1024,256,1,0,0.23063359260559083
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1024,128,1,0,0.33183679580688474
SGLang,0.5.6.post2,NVIDIA H200,mla_generation,fa3,fp8_block,fp8,64,1024,256,1,0,0.4056992053985596
