framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16,1,0,0.01249760016798973
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16,2,0,0.012675200402736665
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16,2,0,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16,4,0,0.01462559998035431
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16,1,0,0.01653759926557541
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16,8,0,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16,4,0,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16,16,0,0.015272000432014465
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16,32,0,0.014632000029087067
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,16,64,0,0.016441600024700166
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16,8,0,0.012505599856376648
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32,1,0,0.013091200590133667
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32,2,0,0.014486399292945863
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32,4,0,0.012489599734544754
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32,8,0,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32,16,0,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32,32,0,0.012505599856376648
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,32,64,0,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32,1,0,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32,2,0,0.016686399281024934
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32,4,0,0.015204800665378571
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32,8,0,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32,16,0,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,64,1,0,0.01660960018634796
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32,32,0,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,64,2,0,0.014496000111103058
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,32,64,0,0.015427200496196747
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,64,32,0,0.01276639997959137
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16,16,0,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,64,4,0,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,64,8,0,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,64,64,0,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,64,16,0,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,64,2,0,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,64,1,0,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,64,4,0,0.016758400201797485
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,64,16,0,0.016519999504089354
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,64,8,0,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16,32,0,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,64,32,0,0.01659200042486191
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,128,4,0,0.014860799908638
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,64,64,0,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,128,8,0,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,128,1,0,0.016683200001716615
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,128,2,0,0.015505599975585937
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,128,64,0,0.014473600685596466
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,128,16,0,0.014539200067520141
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,128,32,0,0.01448799967765808
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,128,1,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,128,2,0,0.018569600582122803
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,128,16,0,0.01693120002746582
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,128,4,0,0.016684800386428833
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,16,64,0,0.011468800157308579
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,128,8,0,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,128,32,0,0.01661919951438904
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,128,64,0,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,256,4,0,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,256,1,0,0.023038400709629057
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,256,2,0,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,256,8,0,0.017900800704956053
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,256,64,0,0.018632000684738158
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,256,16,0,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,256,4,0,0.02292959988117218
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,256,1,0,0.02879680097103119
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,256,32,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,256,2,0,0.022852799296379088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,256,8,0,0.02109760046005249
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,256,64,0,0.019089600443840025
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,256,16,0,0.021196800470352172
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,256,32,0,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,512,2,0,0.028808000683784484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,512,1,0,0.033030399680137636
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,512,8,0,0.02276639938354492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,512,2,0,0.030857598781585692
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,512,32,0,0.022651199996471406
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,512,4,0,0.024868799746036528
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,512,16,0,0.022707200050354003
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,512,64,0,0.020894399285316466
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,512,8,0,0.026804798841476442
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,512,32,0,0.024673600494861603
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,512,64,0,0.02480800002813339
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,512,1,0,0.035902398824691775
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,512,16,0,0.02486239969730377
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,512,4,0,0.026855999231338502
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1024,1,0,0.064792001247406
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1024,2,0,0.041140800714492796
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1024,4,0,0.0349727988243103
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1024,8,0,0.030958399176597595
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,1024,64,0,0.028222399950027465
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1024,16,0,0.02898080050945282
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1024,32,0,0.029123198986053467
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1024,1,0,0.0673695981502533
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1024,2,0,0.04328640103340149
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1024,4,0,0.03710240125656128
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,1024,64,0,0.031196799874305726
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1024,32,0,0.03292959928512573
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1024,8,0,0.035097599029541016
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1024,16,0,0.03311519920825958
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1536,1,0,0.10024319887161255
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1536,2,0,0.06439039707183838
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1536,32,0,0.03712800145149231
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1536,4,0,0.04521119892597199
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1536,1,0,0.10496959686279297
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1536,8,0,0.039473599195480345
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1536,2,0,0.06623839735984802
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1536,16,0,0.039059200882911684
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,1536,64,0,0.037003201246261594
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1536,8,0,0.04319359958171844
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1536,4,0,0.0486735999584198
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1536,16,0,0.040166398882865904
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1536,32,0,0.03915840089321136
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,1536,64,0,0.039217600226402284
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,2048,1,0,0.14239519834518433
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,2048,4,0,0.0556768000125885
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,2048,2,0,0.0884656012058258
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,2048,8,0,0.04938240051269531
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,2048,16,0,0.04578399956226349
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,2048,32,0,0.045433598756790164
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,2048,64,0,0.04336000084877014
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,2048,2,0,0.09120960235595703
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,2048,1,0,0.1455631971359253
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,2048,4,0,0.05798879861831665
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,2048,8,0,0.053395199775695804
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,2048,16,0,0.04939840137958527
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,2048,32,0,0.04734399914741516
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,2048,64,0,0.04742400050163269
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,3072,1,0,0.24277920722961427
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,3072,2,0,0.1425439953804016
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,3072,4,0,0.09241759777069092
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,3072,8,0,0.06744959950447083
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,3072,16,0,0.06302239894866943
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,3072,32,0,0.05973119735717773
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,3072,64,0,0.057625597715377806
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,3072,1,0,0.24791998863220216
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,3072,2,0,0.14734239578247071
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,3072,4,0,0.0961296021938324
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,3072,8,0,0.0703279972076416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,3072,16,0,0.06537759900093079
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,3072,32,0,0.06369280219078063
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,3072,64,0,0.061684799194335935
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,4096,1,0,0.3738032102584839
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,4096,2,0,0.21124160289764404
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,4096,4,0,0.13418879508972167
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,4096,8,0,0.08441600203514099
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,4096,16,0,0.07835040092468262
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,4096,32,0,0.07545599937438965
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,4096,64,0,0.07267839908599853
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,4096,1,0,0.37780640125274656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,4096,2,0,0.2147200107574463
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,4096,4,0,0.13774720430374146
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,4096,8,0,0.08905280232429505
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,4096,16,0,0.0824015974998474
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,4096,32,0,0.07811999917030335
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,4096,64,0,0.07799999713897705
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,6144,1,0,0.7200416088104248
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,6144,4,0,0.22487359046936034
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,6144,2,0,0.3894223928451538
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,6144,8,0,0.14759520292282105
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,6144,16,0,0.11172480583190918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,6144,32,0,0.10672160387039184
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,6144,64,0,0.10263839960098267
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,6144,2,0,0.39437758922576904
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,6144,8,0,0.15079200267791748
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,6144,16,0,0.11480319499969482
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,6144,4,0,0.23073599338531495
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,6144,32,0,0.10926239490509033
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,6144,64,0,0.10591520071029663
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,6144,1,0,0.7238431930541992
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,8192,2,0,0.6246032238006591
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,8192,4,0,0.3514672040939331
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,8192,8,0,0.22077279090881347
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,8192,16,0,0.14442559480667114
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,8192,32,0,0.13590879440307618
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,8192,1,0,1.1926655769348145
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,8192,64,0,0.1327903985977173
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,8192,2,0,0.6317791938781738
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,8192,4,0,0.35711040496826174
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,8192,8,0,0.22465438842773439
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,8192,16,0,0.14810400009155272
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,8192,32,0,0.141702401638031
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,8192,1,0,1.1830127716064454
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,8192,64,0,0.1380031943321228
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,10240,2,0,0.9176799774169921
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,10240,8,0,0.3034976005554199
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,10240,4,0,0.5042623996734619
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,10240,32,0,0.16773760318756104
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,10240,64,0,0.16248639822006225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,10240,16,0,0.1904960036277771
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,10240,64,0,0.1665328025817871
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,10240,32,0,0.1721552014350891
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,10240,16,0,0.19430559873580933
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,10240,8,0,0.3076767921447754
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,10240,1,0,1.7616592407226563
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,10240,2,0,0.9280223846435547
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,10240,4,0,0.5090943813323975
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,10240,1,0,1.769366455078125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,12288,32,0,0.20213758945465088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,12288,16,0,0.30206239223480225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,12288,8,0,0.3953952074050903
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,12288,4,0,0.6859583854675293
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,12288,64,0,0.19262720346450807
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,12288,2,0,1.2619215965270996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,12288,8,0,0.40113282203674316
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,12288,1,0,2.4410608291625975
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,12288,64,0,0.19727519750595093
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,12288,2,0,1.2700032234191894
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,12288,16,0,0.25965759754180906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,12288,32,0,0.20236799716949463
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,12288,4,0,0.7157951831817627
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16384,32,0,0.2719487905502319
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16384,16,0,0.4170271873474121
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16384,4,0,1.1834848403930665
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16384,8,0,0.6343135833740234
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,12288,1,0,2.4748783111572266
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,1,16384,64,0,0.2518608093261719
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16384,2,0,2.145484733581543
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16384,32,0,0.3264672040939331
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16384,16,0,0.40100321769714353
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16384,8,0,0.7077775955200195
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,1,16384,64,0,0.3157088041305542
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16384,4,0,1.1333295822143554
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16,4,0,0.01462240070104599
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16,1,0,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16,16,0,0.014497600495815277
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16384,2,0,2.1928720474243164
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,16,64,0,0.014468799531459808
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16,8,0,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16384,1,0,4.268844985961914
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16,2,0,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16,32,0,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16,2,0,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16,4,0,0.018724800646305086
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16,8,0,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16,16,0,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16,1,0,0.022683200240135194
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,16,64,0,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32,1,0,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32,2,0,0.014548799395561219
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16,32,0,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32,4,0,0.016763199865818024
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32,8,0,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32,32,0,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,32,64,0,0.014481599628925323
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32,16,0,0.016624000668525696
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32,1,0,0.018875199556350707
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16384,1,0,4.16099853515625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32,4,0,0.021118399500846863
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32,8,0,0.01675360053777695
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32,16,0,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32,2,0,0.01672479957342148
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32,32,0,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,64,1,0,0.018559999763965607
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,64,4,0,0.01658399999141693
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,32,64,0,0.016697600483894348
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,64,8,0,0.014694400131702423
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,64,16,0,0.013279999792575835
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,64,2,0,0.018371200561523436
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,64,32,0,0.012505599856376648
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,64,64,0,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,64,1,0,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,64,2,0,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,64,8,0,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,64,16,0,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,64,4,0,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,64,64,0,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,64,32,0,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,128,1,0,0.022017599642276765
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,128,2,0,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,128,4,0,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,128,8,0,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,128,16,0,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,128,64,0,0.01313440054655075
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,128,32,0,0.01465120017528534
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,128,2,0,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,128,4,0,0.018782399594783783
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,128,1,0,0.024876800179481507
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,128,8,0,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,256,1,0,0.02903040051460266
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,128,16,0,0.016865600645542145
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,128,32,0,0.018568000197410582
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,128,64,0,0.01661919951438904
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,256,8,0,0.01919520050287247
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,256,2,0,0.024728000164031982
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,256,4,0,0.02067680060863495
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,256,16,0,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,256,64,0,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,256,1,0,0.031995201110839845
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,256,32,0,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,256,2,0,0.028513601422309874
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,256,4,0,0.022896000742912294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,256,8,0,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,256,16,0,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,256,32,0,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,256,64,0,0.020891200006008147
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,512,1,0,0.0539247989654541
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,512,4,0,0.028828799724578857
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,512,8,0,0.02481119930744171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,512,2,0,0.033504000306129454
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,512,16,0,0.022703999280929567
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,512,32,0,0.02189760059118271
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,512,64,0,0.02268799990415573
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,512,1,0,0.057206398248672484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,512,2,0,0.037062400579452516
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,512,4,0,0.030888000130653383
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,512,8,0,0.028507199883461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,512,32,0,0.02484000027179718
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,512,16,0,0.02553440034389496
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,512,64,0,0.025679999589920045
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1024,1,0,0.10907520055770874
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1024,2,0,0.06588320136070251
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1024,4,0,0.041212800145149234
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1024,8,0,0.03713119924068451
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1024,16,0,0.03293600082397461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,1024,64,0,0.03096800148487091
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1024,1,0,0.11362559795379638
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1024,32,0,0.03108479976654053
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1024,2,0,0.0688704013824463
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1024,4,0,0.045239999890327454
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1024,8,0,0.0394351989030838
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1024,16,0,0.036817601323127745
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1024,32,0,0.03306080102920532
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,1024,64,0,0.03311359882354736
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1536,1,0,0.17380479574203492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1536,2,0,0.10327520370483398
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1536,4,0,0.06609600186347961
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1536,8,0,0.047367998957633974
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1536,16,0,0.04163520038127899
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1536,32,0,0.04089759886264801
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,1536,64,0,0.039164799451828006
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1536,1,0,0.17940640449523926
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1536,2,0,0.10667680501937866
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1536,4,0,0.0698751986026764
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1536,8,0,0.04967359900474548
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1536,16,0,0.04573439955711365
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1536,32,0,0.043198400735855104
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,2048,8,0,0.05792800188064575
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,1536,64,0,0.04331839978694916
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,2048,1,0,0.2518863916397095
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,2048,2,0,0.14488159418106078
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,2048,4,0,0.09238399863243103
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,2048,16,0,0.05189599990844727
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,2048,32,0,0.049414399266242984
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,2048,64,0,0.04723840057849884
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,2048,1,0,0.2563663959503174
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,2048,2,0,0.14867839813232422
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,2048,4,0,0.09396479725837707
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,2048,8,0,0.061694401502609256
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,3072,2,0,0.24687840938568115
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,2048,16,0,0.056883198022842404
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,2048,32,0,0.05341119766235351
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,2048,64,0,0.05135520100593567
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,3072,1,0,0.45002079010009766
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,3072,4,0,0.14646719694137572
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,3072,8,0,0.09626079797744751
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,3072,16,0,0.07200800180435181
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,3072,32,0,0.06627519726753235
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,3072,64,0,0.06369760036468505
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,3072,4,0,0.15116480588912964
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,3072,1,0,0.4556560039520264
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,3072,8,0,0.10076960325241088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,3072,16,0,0.07432479858398437
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,3072,2,0,0.251476788520813
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,4096,8,0,0.138811194896698
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,3072,32,0,0.06987040042877198
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,3072,64,0,0.06788640022277832
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,4096,2,0,0.3784816026687622
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,4096,4,0,0.21709120273590088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,4096,1,0,0.7101744174957275
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,4096,32,0,0.08423680067062378
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,4096,16,0,0.09100319743156433
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,4096,64,0,0.0801360011100769
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,4096,4,0,0.2206592082977295
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,4096,2,0,0.38363358974456785
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,4096,8,0,0.1441215991973877
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,4096,16,0,0.09537760019302369
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,4096,32,0,0.08881120085716247
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,4096,1,0,0.7166495800018311
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,4096,64,0,0.0841871976852417
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,6144,8,0,0.23520159721374512
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,6144,4,0,0.3971280097961426
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,6144,16,0,0.15722399950027466
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,6144,32,0,0.12073919773101807
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,6144,2,0,0.727127981185913
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,6144,64,0,0.11490080356597901
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,6144,1,0,1.407686424255371
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,6144,4,0,0.4027184009552002
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,6144,8,0,0.2418976068496704
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,6144,16,0,0.16087199449539186
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,6144,2,0,0.7327295780181885
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,6144,64,0,0.11909760236740112
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,6144,32,0,0.12549279928207396
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,6144,1,0,1.4004976272583007
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,8192,8,0,0.3627104043960571
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,8192,16,0,0.2322160005569458
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,8192,32,0,0.15456160306930541
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,8192,4,0,0.6363984107971191
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,8192,2,0,1.1934864044189453
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,8192,64,0,0.14839680194854737
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,8192,8,0,0.36792640686035155
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,8192,16,0,0.24085919857025145
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,8192,32,0,0.16435840129852294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,8192,4,0,0.6429840087890625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,8192,2,0,1.1983983993530274
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,8192,1,0,2.336599922180176
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,8192,64,0,0.15251840353012086
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,10240,32,0,0.20502080917358398
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,10240,16,0,0.3972032070159912
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,10240,8,0,0.5452256202697754
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,8192,1,0,2.336952018737793
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,10240,64,0,0.1819216012954712
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,10240,4,0,0.9334783554077148
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,10240,16,0,0.3263823986053467
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,10240,2,0,1.7610464096069336
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,10240,8,0,0.5244783878326416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,10240,32,0,0.2104975938796997
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,10240,64,0,0.22206399440765381
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,10240,4,0,1.0656463623046875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,10240,2,0,1.7768144607543945
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,10240,1,0,3.5363983154296874
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,12288,8,0,0.7382207870483398
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,12288,16,0,0.4638815879821777
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,12288,32,0,0.3231695890426636
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,12288,4,0,1.2949935913085937
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,12288,64,0,0.24055039882659912
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,10240,1,0,3.5387504577636717
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,12288,8,0,0.7216976165771485
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,12288,2,0,2.4950944900512697
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,12288,32,0,0.2865967988967896
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,12288,4,0,1.3068047523498536
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,12288,64,0,0.303056001663208
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,12288,16,0,0.4891808032989502
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,12288,2,0,2.631604766845703
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16384,8,0,1.152188777923584
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16384,16,0,0.6870831966400146
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,12288,1,0,5.026776123046875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16384,32,0,0.45642719268798826
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,2,16384,64,0,0.2851423978805542
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16384,4,0,2.2470863342285154
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16384,8,0,1.1746224403381347
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,12288,1,0,5.124380874633789
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16384,16,0,0.7647439956665039
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16,1,0,0.022150400280952453
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16384,32,0,0.45192642211914064
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16,2,0,0.016859200596809388
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,2,16384,64,0,0.3710031986236572
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16,4,0,0.014670400321483612
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16,8,0,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16,32,0,0.012483199685811996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16384,4,0,2.364366340637207
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16,16,0,0.016681599617004394
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16384,2,0,4.4949390411376955
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,16,64,0,0.014617599546909332
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16,1,0,0.022755199670791627
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16,2,0,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16,4,0,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16,8,0,0.02274720072746277
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16,16,0,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16,32,0,0.016708800196647645
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,16,64,0,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,32,1,0,0.024864000082015992
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16384,2,0,4.442391967773437
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,32,32,0,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,32,2,0,0.02054239958524704
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,32,4,0,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,32,8,0,0.014678399264812469
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,32,16,0,0.01446560025215149
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,32,64,0,0.016548800468444824
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,32,8,0,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,32,1,0,0.030979201197624207
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,32,4,0,0.016816000640392303
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,32,2,0,0.018727999925613404
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,32,16,0,0.01669120043516159
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,32,32,0,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,32,64,0,0.015356799960136414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,64,1,0,0.02492000013589859
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,64,2,0,0.016995200514793397
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,64,8,0,0.014692799746990204
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16384,1,0,9.039059448242188
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,64,16,0,0.01658879965543747
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,64,4,0,0.015911999344825744
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,64,4,0,0.024702399969100952
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,64,32,0,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,64,64,0,0.012488000094890594
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,64,1,0,0.027249601483345032
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,64,2,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,64,8,0,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,64,32,0,0.021667200326919555
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,128,8,0,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,64,64,0,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,128,1,0,0.031097599864006044
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,64,16,0,0.01733759939670563
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,128,2,0,0.022598400712013245
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,128,4,0,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16384,1,0,8.874014282226563
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,128,16,0,0.018748800456523895
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,128,32,0,0.014803199470043183
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,128,64,0,0.014537599682807923
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,128,1,0,0.0350383996963501
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,128,2,0,0.024931199848651886
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,128,4,0,0.021236799657344818
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,128,8,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,128,16,0,0.024796800315380098
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,128,64,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,128,32,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,256,1,0,0.04583199918270111
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,256,2,0,0.029576000571250916
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,256,4,0,0.024751999974250795
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,256,8,0,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,256,16,0,0.022745600342750548
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,256,32,0,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,256,16,0,0.022860799729824067
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,256,1,0,0.05011839866638183
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,256,64,0,0.018812799453735353
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,256,2,0,0.03296000063419342
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,256,4,0,0.027004799246788024
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,256,8,0,0.024753600358963013
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,256,32,0,0.020980800688266753
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,256,64,0,0.022856000065803527
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,512,1,0,0.09280319809913636
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,512,4,0,0.034760001301765445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,512,8,0,0.030427199602127076
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,512,2,0,0.05536800026893616
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,512,16,0,0.024900799989700316
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,512,32,0,0.024771200120449068
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,512,64,0,0.022755199670791627
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,512,1,0,0.09667519927024841
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,512,2,0,0.058284801244735715
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,512,4,0,0.0375328004360199
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1024,2,0,0.11152160167694092
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,512,16,0,0.028942400217056276
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,512,32,0,0.02882719933986664
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,512,8,0,0.033032000064849854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,512,64,0,0.026900801062583923
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1024,1,0,0.19285759925842286
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1024,4,0,0.06928319931030273
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1024,8,0,0.0434688001871109
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1024,16,0,0.03914879858493805
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1024,32,0,0.035478401184082034
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,1024,64,0,0.034200000762939456
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1024,2,0,0.1157647967338562
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1024,4,0,0.07180799841880799
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1024,8,0,0.04835039973258972
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1024,16,0,0.041257598996162416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1024,1,0,0.19672800302505494
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1024,32,0,0.03907040059566498
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,1024,64,0,0.039068800210952756
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1536,1,0,0.3185744047164917
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1536,2,0,0.17870559692382812
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1536,4,0,0.10682719945907593
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1536,8,0,0.0698751986026764
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1536,16,0,0.05139679908752441
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,1536,64,0,0.044964799284935
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1536,32,0,0.04731520116329193
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1536,2,0,0.18333760499954224
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1536,4,0,0.11233919858932495
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1536,8,0,0.07419840097427369
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1536,1,0,0.3245599985122681
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1536,32,0,0.05145440101623535
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,1536,64,0,0.04730879962444305
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1536,16,0,0.055593597888946536
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,2048,2,0,0.25771520137786863
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,2048,4,0,0.15056480169296266
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,2048,8,0,0.096697598695755
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,2048,1,0,0.47689118385314944
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,2048,2,0,0.2620399951934814
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,2048,32,0,0.05758399963378906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,2048,64,0,0.05430560111999512
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,2048,16,0,0.06251999735832214
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,2048,4,0,0.15468480587005615
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,2048,1,0,0.4813839912414551
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,2048,8,0,0.10101120471954346
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,2048,16,0,0.06805920004844665
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,2048,32,0,0.06133120059967041
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,2048,64,0,0.05761439800262451
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,3072,4,0,0.25566079616546633
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,3072,8,0,0.15519839525222778
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,3072,16,0,0.10484639406204224
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,3072,1,0,0.8773440361022949
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,3072,32,0,0.07986879944801331
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,3072,8,0,0.16190240383148194
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,3072,2,0,0.4602719783782959
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,3072,64,0,0.07392479777336121
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,3072,4,0,0.26111679077148436
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,3072,2,0,0.46839199066162107
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,3072,16,0,0.11131680011749268
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,3072,32,0,0.08471680283546448
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,3072,1,0,0.8801967620849609
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,3072,64,0,0.07982720136642456
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,4096,8,0,0.22885439395904542
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,4096,16,0,0.15123360157012938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,4096,4,0,0.3918015956878662
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,4096,32,0,0.10226240158081054
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,4096,2,0,0.7200736045837403
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,4096,64,0,0.0984063982963562
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,4096,1,0,1.3926560401916503
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,4096,8,0,0.23459200859069823
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,4096,4,0,0.3966847896575928
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,4096,16,0,0.15607839822769165
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,4096,32,0,0.10733920335769653
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,4096,2,0,0.7331615924835205
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,4096,64,0,0.10268800258636475
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,4096,1,0,1.3975407600402832
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,6144,8,0,0.4141632080078125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,6144,16,0,0.2512768030166626
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,6144,4,0,0.745681619644165
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,6144,32,0,0.17286080121994019
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,6144,2,0,1.425723171234131
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,6144,64,0,0.15150879621505736
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,6144,16,0,0.2901760101318359
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,6144,32,0,0.1846351981163025
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,6144,8,0,0.42207999229431153
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,6144,4,0,0.7538047790527344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,6144,64,0,0.1609328031539917
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,6144,1,0,2.7894880294799806
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,6144,2,0,1.4147135734558105
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,8192,16,0,0.4244368076324463
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,8192,8,0,0.6757855892181397
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,8192,32,0,0.28066239356994627
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,8192,4,0,1.22119197845459
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,8192,64,0,0.22181758880615235
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,6144,1,0,2.785960006713867
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,8192,2,0,2.324900817871094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,8192,16,0,0.4761551856994629
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,8192,8,0,0.7612383842468262
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,8192,4,0,1.290827178955078
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,8192,64,0,0.21572799682617189
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,8192,32,0,0.3274751901626587
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,8192,2,0,2.3454639434814455
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,10240,8,0,1.0657360076904296
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,8192,1,0,4.8911376953125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,10240,16,0,0.6425312042236329
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,10240,4,0,1.8118175506591796
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,10240,32,0,0.36789119243621826
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,10240,64,0,0.299073600769043
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,8192,1,0,4.773345565795898
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,10240,16,0,0.6813168048858642
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,10240,8,0,0.9906975746154785
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,10240,64,0,0.30808320045471194
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,10240,2,0,3.649687957763672
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,10240,32,0,0.4574863910675049
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,10240,4,0,1.8400960922241212
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,12288,16,0,0.8166111946105957
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,12288,8,0,1.5497535705566405
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,10240,2,0,3.66656494140625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,12288,32,0,0.4975168228149414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,12288,64,0,0.3943471908569336
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,12288,4,0,2.548139190673828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,10240,1,0,7.423379516601562
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,12288,16,0,0.8554719924926758
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,12288,8,0,1.453774356842041
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,12288,64,0,0.3615679979324341
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,12288,32,0,0.5310991764068603
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,12288,2,0,5.189932632446289
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,10240,1,0,7.457023620605469
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,12288,4,0,2.517068862915039
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16384,16,0,1.4121343612670898
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,12288,2,0,5.093281555175781
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16384,8,0,2.5159536361694337
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16384,32,0,0.7632175922393799
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,4,16384,64,0,0.5938576221466064
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,12288,1,0,10.360441589355469
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16384,4,0,4.310667037963867
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16384,16,0,1.426740837097168
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16384,8,0,2.284662437438965
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16384,32,0,0.8676896095275879
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,12288,1,0,10.67234115600586
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,16,1,0,0.03365280032157898
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16384,4,0,4.352926254272461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,4,16384,64,0,0.5922880172729492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,16,2,0,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,16,4,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,16,8,0,0.016684800386428833
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,16,16,0,0.01671680063009262
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,16,32,0,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,16,64,0,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,16,1,0,0.03534240126609802
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,16,2,0,0.02492479979991913
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,16,4,0,0.02481919974088669
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16384,2,0,9.066248321533203
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,16,8,0,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,16,16,0,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,32,2,0,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,16,64,0,0.01973759979009628
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,32,1,0,0.041142401099205014
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,16,32,0,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,32,4,0,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,32,8,0,0.014567999541759491
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,32,16,0,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,32,32,0,0.016598400473594666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,32,64,0,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,32,1,0,0.0350816011428833
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,32,2,0,0.024718399345874786
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,32,32,0,0.016651199758052827
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,32,4,0,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,32,8,0,0.019721600413322448
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,32,16,0,0.01844000071287155
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,32,64,0,0.016521599888801575
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,64,1,0,0.037294399738311765
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,64,2,0,0.02481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,64,4,0,0.016788800060749055
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16384,2,0,9.040892791748046
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,64,8,0,0.016606399416923524
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,64,16,0,0.014548799395561219
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,64,32,0,0.014664000272750855
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,64,64,0,0.014609600603580474
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,64,1,0,0.052883201837539674
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,64,2,0,0.028911998867988585
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,64,4,0,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,64,8,0,0.018824000656604768
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,64,16,0,0.01879359930753708
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,64,32,0,0.022678400576114654
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,64,64,0,0.016606399416923524
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,128,1,0,0.04888159930706024
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,128,2,0,0.030995199084281923
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,128,4,0,0.02288320064544678
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,128,8,0,0.019118399918079378
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,128,16,0,0.016737599670886994
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,128,32,0,0.014655999839305878
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,128,64,0,0.014646400511264802
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,128,1,0,0.0515936017036438
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,128,2,0,0.03552959859371185
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,128,4,0,0.024831999838352204
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,128,8,0,0.020796799659729005
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,128,16,0,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,128,64,0,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,256,2,0,0.0485071986913681
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,128,32,0,0.01881600022315979
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,256,1,0,0.08338879942893981
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,256,4,0,0.031089600920677186
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,256,8,0,0.024940800666809083
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,256,16,0,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,256,64,0,0.020772799849510193
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,256,32,0,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,256,1,0,0.08757920265197754
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,256,2,0,0.05146239995956421
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,256,4,0,0.034995201230049136
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,256,8,0,0.02927359938621521
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16384,1,0,18.436770629882812
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,256,32,0,0.0247871994972229
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,256,64,0,0.024771200120449068
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,256,16,0,0.024905599653720856
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,512,4,0,0.058355200290679934
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,512,2,0,0.09473919868469238
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,512,1,0,0.16317600011825562
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,512,8,0,0.05143839716911316
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,512,16,0,0.033127999305725096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,512,32,0,0.028940799832344054
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,512,64,0,0.02701280117034912
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,512,1,0,0.1685647964477539
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,512,2,0,0.09853280186653138
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,512,4,0,0.062089598178863524
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,512,8,0,0.05556480288505554
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,512,16,0,0.03715679943561554
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1024,4,0,0.11672320365905761
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,512,64,0,0.03099200129508972
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,512,32,0,0.03213759958744049
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16384,1,0,18.27397003173828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1024,1,0,0.3610559940338135
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1024,2,0,0.19880640506744385
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1024,1,0,0.3662703990936279
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1024,8,0,0.07885760068893433
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1024,16,0,0.04958719909191132
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1024,32,0,0.04323840141296387
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,1024,64,0,0.04128159880638123
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1024,4,0,0.12117919921875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1024,2,0,0.26966559886932373
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1024,8,0,0.07848640084266663
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1024,16,0,0.05370079874992371
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1024,32,0,0.048588800430297854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,1024,64,0,0.0441536009311676
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,1536,64,0,0.05453280210494995
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1536,2,0,0.32871360778808595
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1536,4,0,0.18609280586242677
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1536,8,0,0.15290080308914183
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1536,1,0,0.6186927795410156
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1536,16,0,0.07820000052452088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1536,32,0,0.059864002466201785
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1536,4,0,0.1912224054336548
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1536,2,0,0.3350127935409546
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1536,8,0,0.12124639749526978
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1536,16,0,0.11021280288696289
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1536,1,0,0.6254816055297852
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1536,32,0,0.06510400176048278
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,1536,64,0,0.05964319705963135
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,2048,4,0,0.27079360485076903
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,2048,8,0,0.16270560026168823
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,2048,2,0,0.48752641677856445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,2048,16,0,0.12382400035858154
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,2048,32,0,0.07592800259590149
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,2048,1,0,0.9324048042297364
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,2048,64,0,0.06969919800758362
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,2048,4,0,0.2744064092636108
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,2048,8,0,0.16809600591659546
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,2048,2,0,0.49362878799438475
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,2048,16,0,0.11531840562820435
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,2048,32,0,0.08065919876098633
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,2048,1,0,0.9372336387634277
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,3072,16,0,0.17204159498214722
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,2048,64,0,0.07402719855308533
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,3072,4,0,0.47739200592041015
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,3072,8,0,0.2750623941421509
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,3072,32,0,0.12236000299453735
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,3072,2,0,0.8923727989196777
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,3072,64,0,0.09622719883918762
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,3072,1,0,1.7514287948608398
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,3072,8,0,0.27993919849395754
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,3072,2,0,0.902228832244873
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,3072,4,0,0.48579998016357423
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,3072,16,0,0.1794319987297058
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,3072,32,0,0.12867679595947265
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,3072,64,0,0.10936479568481446
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,3072,1,0,1.7558559417724608
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,4096,16,0,0.2507263898849487
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,4096,8,0,0.4119872093200684
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,4096,32,0,0.18692959547042848
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,4096,64,0,0.13551679849624634
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,4096,4,0,0.7446832180023193
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,4096,2,0,1.414083194732666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,4096,16,0,0.2605567932128906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,4096,8,0,0.46224160194396974
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,4096,32,0,0.19251840114593505
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,4096,64,0,0.13147679567337037
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,4096,4,0,0.7660927772521973
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,4096,2,0,1.4186719894409179
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,4096,1,0,2.800596809387207
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,6144,16,0,0.4606304168701172
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,6144,8,0,0.7853024005889893
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,6144,32,0,0.3326512098312378
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,6144,64,0,0.21587519645690917
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,6144,4,0,1.4487664222717285
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,4096,1,0,2.814187240600586
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,6144,16,0,0.5309728145599365
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,6144,8,0,0.8309535980224609
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,6144,2,0,2.8439903259277344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,6144,32,0,0.3511087894439697
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,6144,64,0,0.23872799873352052
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,6144,4,0,1.4725888252258301
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,6144,2,0,2.856390380859375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,8192,16,0,0.7043968200683594
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,8192,8,0,1.2888704299926759
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,8192,64,0,0.3017600059509277
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,8192,32,0,0.48789119720458984
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,6144,1,0,5.8900398254394535
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,8192,4,0,2.4080127716064452
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,8192,16,0,0.8136896133422852
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,6144,1,0,5.840974426269531
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,8192,8,0,1.4897744178771972
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,8192,32,0,0.4933055877685547
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,8192,64,0,0.40055041313171386
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,8192,4,0,2.4292976379394533
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,8192,2,0,4.730428695678711
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,10240,16,0,1.2262255668640136
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,8192,2,0,4.92686882019043
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,10240,8,0,1.9608751296997071
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,10240,32,0,0.6449600219726562
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,10240,64,0,0.4832304000854492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,10240,4,0,3.723222351074219
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,8192,1,0,9.876878356933593
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,10240,8,0,2.006524848937988
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,10240,16,0,1.1997471809387208
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,10240,4,0,3.6267887115478517
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,10240,32,0,0.6929999828338623
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,10240,64,0,0.49976320266723634
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,10240,2,0,7.375347137451172
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,8192,1,0,9.897102355957031
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,12288,16,0,1.533897590637207
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,10240,2,0,7.482231903076172
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,12288,32,0,0.8699456214904785
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,12288,8,0,2.8832815170288084
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,12288,64,0,0.668993616104126
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,12288,4,0,5.405500793457032
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,10240,1,0,15.188418579101562
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,12288,8,0,2.952672004699707
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,12288,4,0,5.186240005493164
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,12288,16,0,1.7099103927612305
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,12288,32,0,0.9815024375915528
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,10240,1,0,15.237913513183594
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,12288,64,0,0.6911312103271484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,12288,2,0,10.872395324707032
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,16384,16,0,2.638528060913086
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,12288,2,0,10.848566436767578
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,16384,8,0,4.498728179931641
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,16384,32,0,1.4485792160034179
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,8,16384,64,0,1.0161295890808106
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,16384,4,0,9.347334289550782
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,12288,1,0,21.42303924560547
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,16384,16,0,2.626700782775879
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,12288,1,0,21.343020629882812
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,16384,4,0,9.139730834960938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,16384,8,0,4.524998474121094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,16,1,0,0.051451200246810914
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,16,2,0,0.033774399757385255
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,16,4,0,0.0227183997631073
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,16,8,0,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,16,16,0,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,16384,32,0,1.4649567604064941
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,16,32,0,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,16,64,0,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,8,16384,64,0,1.0605728149414062
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,16,1,0,0.053852802515029906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,16,2,0,0.03715519905090332
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,16,4,0,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,16,8,0,0.0247856006026268
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,16,16,0,0.02117439955472946
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,16,32,0,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,16,64,0,0.01863040030002594
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,32,1,0,0.05838879942893982
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,32,2,0,0.03922399878501892
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,32,4,0,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,16384,2,0,18.097514343261718
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,32,32,0,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,32,8,0,0.015521599352359772
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,32,16,0,0.01653600037097931
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,32,1,0,0.04976640045642853
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,32,64,0,0.01669919937849045
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,32,2,0,0.033327999711036685
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,32,4,0,0.026752001047134398
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,32,8,0,0.020630399882793426
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,32,16,0,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,32,32,0,0.016711999475955964
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,64,1,0,0.05949119925498962
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,64,2,0,0.04126720130443573
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,32,64,0,0.01671680063009262
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,64,4,0,0.02582240104675293
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,64,8,0,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,64,16,0,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,64,64,0,0.014668799936771393
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,64,1,0,0.06188639998435974
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,64,32,0,0.014608000218868256
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,64,2,0,0.04273920059204102
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,64,4,0,0.02895520031452179
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,64,64,0,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,64,8,0,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,64,16,0,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,64,32,0,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,128,1,0,0.0856544017791748
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,128,2,0,0.05177119970321655
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,128,4,0,0.03178240060806274
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,128,8,0,0.02337439954280853
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,128,16,0,0.019139200448989868
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,128,32,0,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,128,64,0,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,128,1,0,0.08924959897994995
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,128,2,0,0.05378400087356568
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,128,4,0,0.035894399881362914
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,128,8,0,0.026759999990463256
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,128,16,0,0.022646400332450866
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,128,32,0,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,16384,2,0,18.363600158691405
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,128,64,0,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,256,1,0,0.14706239700317383
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,256,2,0,0.08558080196380616
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,256,4,0,0.05122720003128052
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,256,8,0,0.0347680002450943
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,256,16,0,0.032892799377441405
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,256,32,0,0.024806399643421174
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,256,64,0,0.02288320064544678
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,256,1,0,0.15168639421463012
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,256,16,0,0.03096640110015869
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,256,4,0,0.05348640084266663
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,256,8,0,0.041335999965667725
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,256,2,0,0.08884159922599792
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,256,32,0,0.02898559868335724
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,256,64,0,0.02860960066318512
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,512,2,0,0.1703968048095703
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,512,1,0,0.3061568021774292
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,512,4,0,0.10295519828796387
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,512,8,0,0.06260480284690857
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,512,16,0,0.04340159893035889
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,512,32,0,0.03712159991264343
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,512,64,0,0.03513599932193756
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,512,1,0,0.31218719482421875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,512,2,0,0.17681759595870972
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,512,4,0,0.10500320196151733
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,512,8,0,0.06939039826393127
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,512,16,0,0.04743359982967377
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,512,32,0,0.04257920086383819
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,512,64,0,0.03899999856948853
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1024,4,0,0.2096992015838623
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1024,2,0,0.3737760066986084
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1024,8,0,0.1285423994064331
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1024,16,0,0.0858735978603363
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1024,1,0,0.7066959857940673
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1024,32,0,0.06159999966621399
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,1024,64,0,0.055587202310562134
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1024,4,0,0.21563999652862548
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1024,16,0,0.09242720007896424
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1024,2,0,0.3803423881530762
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1024,8,0,0.13487839698791504
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1024,1,0,0.7107295989990234
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1024,32,0,0.06579040288925171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,1024,64,0,0.06174399852752686
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1536,8,0,0.20260000228881836
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1536,16,0,0.13337440490722657
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1536,4,0,0.34498560428619385
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1536,32,0,0.09448000192642211
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1536,2,0,0.6365183830261231
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,1536,64,0,0.07588319778442383
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1536,8,0,0.21094880104064942
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1536,16,0,0.13804800510406495
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1536,1,0,1.2215456008911132
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1536,32,0,0.1027008056640625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1536,2,0,0.6422160148620606
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,1536,64,0,0.08218399882316589
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1536,4,0,0.3514256000518799
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1536,1,0,1.2259712219238281
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,2048,8,0,0.2921247959136963
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,2048,16,0,0.1849776029586792
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,2048,4,0,0.5117824077606201
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,2048,32,0,0.13005599975585938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,2048,2,0,0.9529616355895996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,2048,64,0,0.09737439751625061
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,2048,8,0,0.30900800228118896
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,2048,1,0,1.8857536315917969
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,2048,4,0,0.5175136089324951
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,2048,2,0,0.9627231597900391
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,2048,16,0,0.1990831971168518
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,2048,32,0,0.15109119415283204
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,2048,64,0,0.10307199954986572
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,2048,1,0,1.8617279052734375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,3072,16,0,0.3418447971343994
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,3072,8,0,0.5119743824005127
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,16384,1,0,36.576638793945314
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,3072,4,0,0.9644767761230468
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,3072,32,0,0.23024160861968995
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,3072,64,0,0.15720000267028808
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,3072,2,0,1.7780048370361328
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,3072,8,0,0.521342420578003
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,3072,16,0,0.3504751920700073
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,3072,32,0,0.21784799098968505
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,3072,4,0,0.9619839668273926
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,3072,64,0,0.1667088031768799
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,3072,2,0,1.8015439987182618
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,3072,1,0,3.596958541870117
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,4096,8,0,0.7886928081512451
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,4096,16,0,0.515611219406128
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,4096,32,0,0.34824481010437014
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,4096,64,0,0.22040801048278807
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,4096,4,0,1.4610560417175293
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,3072,1,0,3.9190624237060545
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,4096,8,0,0.8064784049987793
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,4096,2,0,2.8351919174194338
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,4096,32,0,0.3108479976654053
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,4096,4,0,1.4709487915039063
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,4096,64,0,0.2486639976501465
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,4096,16,0,0.524564790725708
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,16384,1,0,36.694741821289064
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,4096,2,0,2.9885087966918946
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,6144,16,0,1.0736271858215332
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,6144,32,0,0.604529619216919
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,6144,8,0,1.5932623863220214
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,4096,1,0,5.712123107910156
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,6144,64,0,0.43447041511535645
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,6144,4,0,2.9302047729492187
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,4096,1,0,5.911089706420898
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,6144,16,0,0.9522640228271484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,6144,8,0,1.5965264320373536
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,6144,32,0,0.6117728233337403
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,6144,64,0,0.4210368156433105
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,6144,4,0,2.9654016494750977
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,6144,2,0,5.840676879882812
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,8192,16,0,1.55174560546875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,6144,2,0,5.962964630126953
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,8192,8,0,2.620681571960449
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,8192,32,0,0.8494560241699218
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,16,8192,64,0,0.637934398651123
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,8192,4,0,4.8953296661376955
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,6144,1,0,11.98846435546875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,8192,8,0,2.8761600494384765
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,8192,16,0,1.6332992553710937
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,6144,1,0,12.096939086914062
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,8192,4,0,4.8791648864746096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,16,8192,64,0,0.6321392059326172
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,8192,32,0,0.9888671875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,16,1,0,0.08845279812812805
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,16,4,0,0.03253119885921478
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,16,8,0,0.024963200092315674
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,16,16,0,0.019099199771881105
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,16,32,0,0.0165120005607605
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,16,64,0,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,8192,2,0,10.250812530517578
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,16,2,0,0.05332319736480713
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,16,1,0,0.09595680236816406
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,16,4,0,0.037567999958992
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,16,8,0,0.02887200117111206
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,16,16,0,0.022728000581264497
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,16,2,0,0.04910880029201507
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,16,32,0,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,16,64,0,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,32,1,0,0.10249279737472534
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,32,2,0,0.052404797077178954
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,32,4,0,0.03504480123519897
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,32,8,0,0.026822400093078614
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,32,16,0,0.016551999747753142
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,32,32,0,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,32,64,0,0.015876799821853638
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,32,1,0,0.08611360192298889
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,32,2,0,0.066839998960495
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,32,4,0,0.034995201230049136
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,32,8,0,0.028479999303817748
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,32,16,0,0.020465600490570068
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,32,32,0,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,32,64,0,0.02327840030193329
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,64,1,0,0.1077728033065796
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,64,2,0,0.061622399091720584
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,64,4,0,0.039087998867034915
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,64,32,0,0.01733600050210953
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,64,8,0,0.030910399556159974
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,8192,2,0,10.135915374755859
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,64,1,0,0.11235200166702271
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,64,16,0,0.020443199574947356
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,64,64,0,0.016728000342845918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,64,2,0,0.06575520038604736
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,64,8,0,0.03088639974594116
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,64,4,0,0.05412639975547791
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,64,16,0,0.022804799675941467
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,64,32,0,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,64,64,0,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,128,1,0,0.15742239952087403
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,128,2,0,0.11149120330810547
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,128,4,0,0.053495997190475465
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,128,8,0,0.0355536013841629
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,128,16,0,0.026787200570106508
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,128,32,0,0.022625599801540375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,128,64,0,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,128,2,0,0.09255200028419494
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,128,1,0,0.16094080209732056
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,128,4,0,0.05634080171585083
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,128,8,0,0.03795199990272522
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,128,16,0,0.033055999875068666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,128,32,0,0.025982400774955748
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,128,64,0,0.02351839989423752
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,256,1,0,0.27687199115753175
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,256,2,0,0.15217440128326415
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,256,4,0,0.09329599738121033
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,256,8,0,0.05747039914131165
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,256,16,0,0.0392767995595932
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,256,32,0,0.03506079912185669
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,256,64,0,0.03091520071029663
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,256,4,0,0.09528800249099731
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,256,2,0,0.15719679594039918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,256,1,0,0.2840879917144775
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,256,32,0,0.03909119963645935
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,256,8,0,0.06156960129737854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,256,16,0,0.04357120096683502
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,256,64,0,0.03506399989128113
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,8192,1,0,20.160264587402345
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,512,2,0,0.3168224096298218
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,512,4,0,0.18119200468063354
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,512,8,0,0.11103359460830689
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,512,1,0,0.5903840065002441
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,512,16,0,0.07639039754867553
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,512,64,0,0.04945439994335175
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,512,2,0,0.3216016054153442
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,512,32,0,0.068094402551651
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,512,4,0,0.18709919452667237
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,512,1,0,0.5964784145355224
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,512,8,0,0.11817920207977295
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,512,32,0,0.059543997049331665
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,512,64,0,0.055460798740386966
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,512,16,0,0.0809391975402832
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1024,8,0,0.23352320194244386
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1024,4,0,0.39447999000549316
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1024,16,0,0.15109920501708984
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1024,32,0,0.10832799673080444
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1024,2,0,0.727511978149414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,1024,64,0,0.08218719959259033
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1024,8,0,0.23921759128570558
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1024,16,0,0.15852160453796388
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1024,4,0,0.404201602935791
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1024,1,0,1.6096752166748047
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1024,32,0,0.11546239852905274
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,1024,64,0,0.09057760238647461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,8192,1,0,19.816143798828126
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1024,2,0,0.7333151817321777
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1024,1,0,1.403486442565918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1536,16,0,0.23732640743255615
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1536,8,0,0.3801520109176636
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,1536,64,0,0.12983200550079346
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1536,4,0,0.6778927803039551
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1536,32,0,0.201094388961792
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1536,2,0,1.2514528274536132
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1536,8,0,0.3900079965591431
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1536,16,0,0.2526911973953247
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1536,32,0,0.20780160427093505
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1536,4,0,0.6832863807678222
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,1536,64,0,0.14559680223464966
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1536,2,0,1.259830379486084
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1536,1,0,2.470332717895508
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,2048,32,0,0.2299328088760376
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,2048,16,0,0.35157599449157717
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,2048,8,0,0.5558095932006836
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,2048,64,0,0.19577759504318237
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,2048,4,0,1.015662384033203
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1536,1,0,2.519001579284668
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,2048,2,0,1.9226463317871094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,2048,16,0,0.3547935962677002
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,2048,32,0,0.27838399410247805
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,2048,8,0,0.6304255962371826
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,2048,64,0,0.20692799091339112
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,2048,4,0,1.0191760063171387
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,2048,2,0,1.914779281616211
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,2048,1,0,3.7786209106445314
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,3072,16,0,0.6174863815307617
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,3072,8,0,1.0027024269104003
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,3072,32,0,0.4283599853515625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,3072,64,0,0.2897520065307617
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,3072,4,0,1.8584991455078126
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,2048,1,0,3.762384033203125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,3072,16,0,0.6746287822723389
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,3072,8,0,1.0369296073913574
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,3072,2,0,3.6660640716552733
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,3072,32,0,0.44720959663391113
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,3072,64,0,0.30910239219665525
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,3072,4,0,1.8655471801757812
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,3072,2,0,3.6247280120849608
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,4096,8,0,1.594279956817627
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,4096,16,0,0.880884838104248
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,4096,32,0,0.6286431789398194
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,32,4096,64,0,0.396833610534668
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,3072,1,0,7.3161155700683596
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,4096,4,0,2.941307258605957
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,4096,16,0,1.001094436645508
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,4096,8,0,1.7786624908447266
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,3072,1,0,7.4400177001953125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,4096,32,0,0.6777359962463378
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,16,1,0,0.15249439477920532
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,32,4096,64,0,0.44317917823791503
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,16,2,0,0.08360159993171692
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,16,4,0,0.051801598072052
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,4096,4,0,2.9924175262451174
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,16,8,0,0.031065601110458373
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,16,16,0,0.024672000110149382
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,16,32,0,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,16,64,0,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,16,1,0,0.13623039722442626
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,16,2,0,0.0751695990562439
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,16,4,0,0.0565775990486145
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,16,8,0,0.03705120086669922
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,4096,2,0,5.936812973022461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,16,16,0,0.02292799949645996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,16,32,0,0.01943040043115616
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,16,64,0,0.02279839962720871
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,32,1,0,0.17651360034942626
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,32,2,0,0.1002511978149414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,32,4,0,0.0496288001537323
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,32,8,0,0.03299199938774109
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,32,16,0,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,32,32,0,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,32,64,0,0.018993599712848662
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,32,1,0,0.15531200170516968
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,32,2,0,0.08648800253868102
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,4096,2,0,5.918084716796875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,32,4,0,0.058387202024459836
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,32,8,0,0.03640320003032684
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,32,16,0,0.027003198862075806
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,32,32,0,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,64,8,0,0.04564320147037506
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,32,64,0,0.020524799823760986
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,64,2,0,0.11270560026168823
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,64,1,0,0.24129118919372558
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,64,4,0,0.06413599848747253
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,64,32,0,0.023020799458026885
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,64,16,0,0.02889919877052307
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,64,64,0,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,64,1,0,0.20394399166107177
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,64,2,0,0.13895360231399537
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,64,4,0,0.0667087972164154
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,64,8,0,0.04729439914226532
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,64,16,0,0.033103999495506284
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,64,64,0,0.022974400222301482
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,64,32,0,0.024827200174331664
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,128,2,0,0.16161439418792725
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,4096,1,0,11.9831298828125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,128,1,0,0.37341599464416503
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,128,4,0,0.09374079704284669
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,128,8,0,0.05964959859848022
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,128,16,0,0.041252800822258
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,128,32,0,0.03179199993610382
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,128,64,0,0.02701599895954132
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,128,2,0,0.16686880588531494
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,128,4,0,0.09795039892196655
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,128,8,0,0.0637935996055603
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,128,1,0,0.33396799564361573
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,128,16,0,0.04538719952106476
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,256,2,0,0.2877104043960571
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,128,32,0,0.03561919927597046
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,128,64,0,0.031167998909950256
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,256,32,0,0.05145599842071533
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,256,4,0,0.16390880346298217
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,256,1,0,0.5360335826873779
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,256,8,0,0.10777599811553955
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,256,16,0,0.06989439725875854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,4096,1,0,11.898833465576171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,256,64,0,0.04527359902858734
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,256,2,0,0.29404640197753906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,256,4,0,0.16979199647903442
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,256,1,0,0.5416192054748535
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,256,8,0,0.10759040117263793
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,256,16,0,0.07428159713745117
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,256,32,0,0.05560160279273987
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,256,64,0,0.05336959958076477
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,512,8,0,0.2024463891983032
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,512,4,0,0.3386575937271118
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,512,16,0,0.13337600231170654
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,512,2,0,0.611033582687378
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,512,32,0,0.09824000000953674
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,512,64,0,0.07805439829826355
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,512,1,0,1.1658479690551757
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,512,8,0,0.2112351894378662
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,512,4,0,0.3480272054672241
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,512,2,0,0.6220608234405518
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,512,16,0,0.14159200191497803
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,512,32,0,0.10467519760131835
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,512,64,0,0.08218240141868591
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,512,1,0,1.1741439819335937
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1024,16,0,0.27684481143951417
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1024,8,0,0.4399263858795166
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1024,32,0,0.1948591947555542
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,1024,64,0,0.1520640015602112
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1024,4,0,0.7697296142578125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1024,2,0,1.4379119873046875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1024,8,0,0.4528063774108887
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1024,16,0,0.2908047914505005
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1024,32,0,0.20611999034881592
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1024,4,0,0.7849279880523682
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,1024,64,0,0.16412160396575928
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1024,2,0,1.4570624351501464
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1024,1,0,2.8001327514648438
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1536,32,0,0.3222815990447998
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1536,8,0,0.7341504096984863
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1536,16,0,0.4577487945556641
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1024,1,0,2.841211128234863
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,1536,64,0,0.23703999519348146
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1536,4,0,1.3196528434753418
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1536,16,0,0.47159199714660643
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1536,32,0,0.33702239990234373
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1536,2,0,2.4963024139404295
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1536,8,0,0.8131823539733887
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,1536,64,0,0.25507678985595705
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1536,4,0,1.3462112426757813
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,2048,16,0,0.6491407871246337
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1536,2,0,2.540216064453125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,2048,8,0,1.0836480140686036
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,2048,32,0,0.42931361198425294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,64,2048,64,0,0.3414367914199829
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1536,1,0,5.0601951599121096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,2048,4,0,1.9950624465942384
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,2048,16,0,0.6998847961425781
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1536,1,0,5.073395156860352
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,2048,8,0,1.2051664352416993
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,2048,2,0,3.872991943359375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,2048,32,0,0.46608638763427734
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,64,2048,64,0,0.38513760566711425
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,2048,4,0,2.0702207565307615
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,16,1,0,0.23808801174163818
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,16,2,0,0.14166079759597777
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,16,16,0,0.029475200176239013
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,16,4,0,0.07265759706497192
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,16,8,0,0.04923360049724579
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,16,32,0,0.020854400098323823
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,16,64,0,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,16,2,0,0.1282047986984253
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,16,1,0,0.25538880825042726
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,16,4,0,0.0767520010471344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,16,8,0,0.04729439914226532
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,16,16,0,0.033435198664665225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,16,32,0,0.024692800641059876
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,16,64,0,0.021076799929142
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,2048,2,0,3.945670318603516
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,32,1,0,0.2893455982208252
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,32,2,0,0.15279680490493774
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,32,4,0,0.08703680038452148
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,32,8,0,0.05178400278091431
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,32,16,0,0.0368800014257431
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,32,32,0,0.027532801032066345
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,32,4,0,0.0881168007850647
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,32,64,0,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,2048,1,0,7.777870178222656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,32,1,0,0.28649919033050536
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,32,2,0,0.15859839916229249
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,32,8,0,0.055478399991989134
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,32,16,0,0.04118559956550598
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,64,2,0,0.20359361171722412
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,64,4,0,0.11701439619064331
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,32,32,0,0.028825598955154418
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,32,64,0,0.022864000499248506
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,64,8,0,0.06865440011024475
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,64,1,0,0.4493375778198242
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,64,16,0,0.0473471999168396
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,64,32,0,0.034934398531913755
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,64,64,0,0.026980799436569215
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,64,16,0,0.05143359899520874
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,2048,1,0,7.86495361328125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,64,1,0,0.38194880485534666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,64,2,0,0.21050560474395752
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,64,4,0,0.12159359455108643
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,64,8,0,0.07399200201034546
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,64,32,0,0.039217600226402284
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,64,64,0,0.031033599376678468
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,128,4,0,0.17466239929199218
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,128,1,0,0.5738687992095948
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,128,8,0,0.10631680488586426
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,128,2,0,0.35147359371185305
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,128,16,0,0.07092000246047973
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,128,32,0,0.052742397785186766
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,128,64,0,0.04324640035629272
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,128,2,0,0.3131727933883667
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,128,4,0,0.17971999645233155
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,128,1,0,0.5790192127227783
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,128,8,0,0.11484960317611695
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,128,16,0,0.07594559788703918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,128,32,0,0.05748639702796936
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,128,64,0,0.04747200012207031
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,256,4,0,0.3096976041793823
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,256,16,0,0.12463040351867676
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,256,8,0,0.18649280071258545
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,256,2,0,0.5588031768798828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,256,32,0,0.09037439823150635
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,256,1,0,1.0502927780151368
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,256,64,0,0.07221440076828003
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,256,8,0,0.1931488037109375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,256,4,0,0.31919040679931643
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,256,2,0,0.5643424034118653
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,256,16,0,0.13213119506835938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,256,32,0,0.09828320145606995
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,256,64,0,0.08017119765281677
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,256,1,0,1.0589823722839355
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,512,8,0,0.3823776006698608
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,512,16,0,0.24667201042175294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,512,4,0,0.6571663856506348
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,512,32,0,0.17735040187835693
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,512,64,0,0.1410704016685486
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,512,2,0,1.2059503555297852
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,512,16,0,0.25986080169677733
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,512,8,0,0.3959104061126709
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,512,32,0,0.1888432025909424
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,512,4,0,0.6689248085021973
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,512,1,0,2.322171211242676
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,512,64,0,0.15383679866790773
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,512,2,0,1.2253104209899903
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,512,1,0,2.338921546936035
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,1024,32,0,0.36481759548187254
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,1024,16,0,0.5297935962677002
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,1024,8,0,0.8817456245422364
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,128,1024,64,0,0.28671040534973147
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,1024,4,0,1.5257295608520507
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,1024,16,0,0.5547088146209717
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,1024,32,0,0.3904848098754883
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,1024,8,0,0.8822303771972656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,1024,2,0,2.889967918395996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,128,1024,64,0,0.3072495937347412
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,16,1,0,0.4455008029937744
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,16,2,0,0.23355839252471924
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,16,8,0,0.0737775981426239
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,1024,4,0,1.5596351623535156
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,16,4,0,0.13670560121536254
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,16,16,0,0.04642240107059479
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,16,32,0,0.03307200074195862
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,16,64,0,0.023830400407314302
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,16,2,0,0.24522559642791747
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,1024,2,0,2.8973487854003905
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,16,4,0,0.13121919631958007
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,16,1,0,0.4570288181304932
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,16,8,0,0.07686880230903625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,16,16,0,0.049296000599861146
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,16,32,0,0.03564639985561371
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,16,64,0,0.02984800040721893
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,1024,1,0,5.77428970336914
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,32,2,0,0.29084000587463377
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,32,4,0,0.15739359855651855
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,32,8,0,0.09085760116577149
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,32,1,0,0.5433328151702881
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,32,16,0,0.05759519934654236
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,32,32,0,0.0432559996843338
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,32,64,0,0.03508319854736328
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,32,2,0,0.2910095930099487
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,32,4,0,0.1630944013595581
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,32,64,0,0.03565120100975037
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,32,1,0,0.5507887840270996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,32,8,0,0.09386559724807739
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,32,16,0,0.06171839833259583
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,64,8,0,0.12767679691314698
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,32,32,0,0.04540959894657135
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,1024,1,0,5.764092636108399
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,64,4,0,0.22099199295043945
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,64,2,0,0.3892287969589233
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,64,16,0,0.08033440113067628
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,64,1,0,0.7367616176605225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,64,4,0,0.22167999744415284
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,64,32,0,0.05761920213699341
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,64,1,0,0.7426784038543701
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,64,64,0,0.045368000864982605
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,64,8,0,0.13200160264968872
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,64,16,0,0.08642560243606567
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,64,2,0,0.4522687911987305
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,64,32,0,0.06333439946174621
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,64,64,0,0.05220479965209961
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,128,4,0,0.32850720882415774
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,128,8,0,0.19516479969024658
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,128,1,0,1.1231535911560058
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,128,2,0,0.5901584148406982
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,128,16,0,0.12743200063705445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,128,32,0,0.09516320228576661
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,128,2,0,0.6012991905212403
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,128,64,0,0.07390400171279907
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,128,4,0,0.336078405380249
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,128,8,0,0.20313758850097657
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,128,16,0,0.13528640270233155
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,128,32,0,0.10055040121078491
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,128,1,0,1.1330816268920898
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,128,64,0,0.08205599784851074
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,256,8,0,0.35236320495605467
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,256,16,0,0.22793760299682617
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,256,32,0,0.16761280298233033
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,256,4,0,0.6007936000823975
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,256,64,0,0.13335360288619996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,256,2,0,1.0904784202575684
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,256,16,0,0.2405951976776123
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,256,8,0,0.36602559089660647
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,256,32,0,0.17993600368499757
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,256,4,0,0.6143360137939453
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,256,1,0,2.08239688873291
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,256,64,0,0.1463263988494873
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,256,2,0,1.1060640335083007
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,256,1,0,2.0897472381591795
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,512,32,0,0.3343039989471436
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,512,16,0,0.4721424102783203
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,512,8,0,0.7444863796234131
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,1,256,512,64,0,0.26306240558624266
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,512,4,0,1.2980815887451171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,512,32,0,0.3567791938781738
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,512,2,0,2.401807975769043
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,512,8,0,0.7651616096496582
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,512,16,0,0.4938655853271484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,16,1,0,0.016523200273513793
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16,2,0,0.012756800651550293
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16,4,0,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,512,4,0,1.3212431907653808
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,1,256,512,64,0,0.2853568077087402
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16,8,0,0.012595200538635254
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16,16,0,0.01305440068244934
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16,32,0,0.012651200592517852
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16,64,0,0.012641599774360657
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,16,1,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16,16,0,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,512,2,0,2.4319408416748045
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16,64,0,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16,2,0,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16,4,0,0.016575999557971954
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16,8,0,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,512,1,0,4.689912033081055
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16,32,0,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,32,1,0,0.01621440052986145
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,32,2,0,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,32,4,0,0.0133775994181633
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,32,8,0,0.014521600306034088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,32,16,0,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,32,8,0,0.016659200191497803
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,32,32,0,0.012620800733566284
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,32,64,0,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,32,1,0,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,512,1,0,4.74805908203125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,64,1,0,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,32,2,0,0.01822720021009445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,32,4,0,0.01738079935312271
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,64,16,0,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,32,16,0,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,32,32,0,0.014710399508476257
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,32,64,0,0.016571199893951415
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,64,2,0,0.01650079935789108
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,64,8,0,0.014670400321483612
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,64,4,0,0.014679999649524688
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,64,32,0,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,64,64,0,0.014636799693107605
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,64,1,0,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,64,2,0,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,64,4,0,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,64,8,0,0.016684800386428833
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,64,16,0,0.017558400332927705
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,64,32,0,0.01669279932975769
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,128,32,0,0.014473600685596466
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,64,64,0,0.01674720048904419
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,128,1,0,0.020798400044441223
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,128,2,0,0.01674560010433197
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,128,4,0,0.016523200273513793
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,128,8,0,0.01448799967765808
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,128,16,0,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,128,32,0,0.016515199840068818
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,128,64,0,0.014683200418949128
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,128,1,0,0.022767999768257143
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,128,2,0,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,128,4,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,128,8,0,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,128,16,0,0.017310400307178498
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,128,64,0,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,256,1,0,0.029046401381492615
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,256,2,0,0.023537600040435792
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,256,4,0,0.020852799713611602
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,256,8,0,0.020216000080108643
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,256,16,0,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,256,32,0,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,256,64,0,0.01668799966573715
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,256,1,0,0.03301919996738434
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,256,2,0,0.027505600452423097
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,256,4,0,0.02288320064544678
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,256,8,0,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,256,16,0,0.02274720072746277
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,256,32,0,0.02072480022907257
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,512,16,0,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,256,64,0,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,512,32,0,0.02266400009393692
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,512,1,0,0.05362719893455505
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,512,2,0,0.033073601126670835
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,512,4,0,0.026902401447296144
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,512,8,0,0.024961599707603456
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,512,64,0,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,512,1,0,0.05565440058708191
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,512,2,0,0.03706560134887695
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,512,4,0,0.030953601002693176
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,512,8,0,0.02675839960575104
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,512,16,0,0.026872000098228453
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,512,32,0,0.024831999838352204
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,512,64,0,0.024937599897384644
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,1024,1,0,0.10660480260848999
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1024,2,0,0.06477760076522827
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1024,4,0,0.04109599888324737
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1024,8,0,0.035067200660705566
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1024,16,0,0.03157599866390228
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1024,32,0,0.029335999488830568
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1024,64,0,0.02895039916038513
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,1024,1,0,0.11135679483413696
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1024,2,0,0.0664192020893097
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1024,4,0,0.044200000166893
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1024,8,0,0.03793280124664307
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1024,16,0,0.035043200850486754
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1024,32,0,0.032995200157165526
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1024,64,0,0.032971200346946714
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,1536,1,0,0.1718672037124634
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,1536,2,0,0.09998400211334228
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,1536,4,0,0.06369439959526062
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,1536,8,0,0.04505760073661804
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,1536,16,0,0.03916000127792359
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,1536,8,0,0.04737119972705841
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,1536,32,0,0.03727039992809296
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,1536,64,0,0.037088000774383546
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,1536,1,0,0.17508959770202637
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,1536,2,0,0.10510400533676148
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,1536,4,0,0.0671280026435852
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,1536,16,0,0.04316479861736298
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,1536,32,0,0.040188801288604734
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,1536,64,0,0.03916960060596466
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,2048,1,0,0.249289608001709
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,2048,2,0,0.14130879640579225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,2048,4,0,0.08818879723548889
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,2048,8,0,0.05555840134620667
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,2048,16,0,0.04939039945602417
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,2048,32,0,0.04532960057258606
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,2048,64,0,0.04526079893112182
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,2048,1,0,0.2546256065368652
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,2048,2,0,0.14636160135269166
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,2048,8,0,0.05802239775657654
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,2048,16,0,0.05230879783630371
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,3072,4,0,0.14159200191497803
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,2048,4,0,0.09084320068359375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,2048,32,0,0.04956960082054138
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,2048,64,0,0.04766719937324524
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,3072,2,0,0.2447551965713501
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,3072,1,0,0.44901437759399415
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,3072,8,0,0.0920960009098053
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,3072,2,0,0.2479680061340332
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,3072,16,0,0.06671839952468872
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,3072,16,0,0.07033759951591492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,3072,32,0,0.061720001697540286
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,3072,64,0,0.05961920022964477
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,3072,4,0,0.15004320144653321
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,3072,8,0,0.09671679735183716
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,3072,1,0,0.45216641426086424
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,3072,32,0,0.065801602602005
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,3072,64,0,0.06367040276527405
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,4096,4,0,0.21202559471130372
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,4096,2,0,0.37528960704803466
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,4096,8,0,0.13528640270233155
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,4096,1,0,0.7052576065063476
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,4096,16,0,0.0845583975315094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,4096,32,0,0.07877600193023682
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,4096,16,0,0.08897759914398193
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,4096,64,0,0.07397119998931885
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,4096,64,0,0.0779919981956482
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,4096,2,0,0.377673602104187
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,4096,4,0,0.21599359512329103
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,4096,8,0,0.1383407950401306
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,4096,1,0,0.7097792148590087
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,4096,32,0,0.08216639757156372
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,6144,8,0,0.2248447895050049
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,6144,4,0,0.3885056018829346
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,6144,16,0,0.14653760194778442
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,6144,32,0,0.11099040508270264
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,6144,2,0,0.7196352005004882
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,6144,64,0,0.10544639825820923
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,6144,16,0,0.1527024030685425
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,6144,1,0,1.397475242614746
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,6144,4,0,0.39359359741210936
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,6144,8,0,0.23064959049224854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,6144,2,0,0.7250207901000977
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,6144,32,0,0.11367039680480957
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,6144,64,0,0.1089967966079712
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,6144,1,0,1.3996288299560546
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,8192,8,0,0.36242239475250243
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,8192,16,0,0.21942079067230225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,8192,32,0,0.14426399469375611
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,8192,4,0,0.6311088085174561
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,8192,64,0,0.14401439428329468
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,8192,2,0,1.1840784072875976
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,8192,8,0,0.3569664001464844
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,8192,16,0,0.23157761096954346
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,8192,4,0,0.6314847946166993
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,8192,32,0,0.17606240510940552
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,8192,64,0,0.1409119963645935
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,8192,1,0,2.3186960220336914
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,8192,2,0,1.182636833190918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,10240,32,0,0.20944321155548096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,10240,16,0,0.34409120082855227
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,10240,8,0,0.5075568199157715
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,10240,64,0,0.18343520164489746
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,10240,4,0,0.9905088424682618
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,8192,1,0,2.3223312377929686
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,10240,2,0,1.7788511276245118
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,10240,8,0,0.5327040195465088
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,10240,16,0,0.3829967975616455
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,10240,32,0,0.23681919574737548
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,10240,4,0,0.933078384399414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,10240,64,0,0.1778656005859375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,10240,2,0,1.7763280868530273
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,10240,1,0,3.4430736541748046
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,12288,16,0,0.46019840240478516
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,12288,8,0,0.6878560066223145
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,12288,32,0,0.2543776035308838
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,12288,64,0,0.2553567886352539
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,12288,4,0,1.2640912055969238
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,10240,1,0,3.638963317871094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,12288,16,0,0.41640000343322753
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,12288,8,0,0.8416031837463379
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,12288,2,0,2.4834447860717774
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,12288,32,0,0.31228160858154297
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,12288,64,0,0.2221343994140625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,12288,4,0,1.2751615524291993
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,12288,2,0,2.5332271575927736
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,1,16384,16,0,0.7177904129028321
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,1,16384,8,0,1.141038417816162
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,1,16384,32,0,0.47438721656799315
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,1,16384,64,0,0.2824160099029541
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,12288,1,0,5.088127899169922
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,1,16384,4,0,2.1495071411132813
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,1,16384,16,0,0.6622576236724853
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,12288,1,0,5.0069633483886715
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,1,16384,8,0,1.3490927696228028
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,1,16384,32,0,0.43116321563720705
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,16,1,0,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,1,16384,64,0,0.3526927947998047
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,1,16384,4,0,2.1454208374023436
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16,2,0,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16,4,0,0.01653439998626709
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16,8,0,0.01669439971446991
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16,16,0,0.016620799899101257
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,1,16384,2,0,4.646492767333984
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16,32,0,0.014617599546909332
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16,64,0,0.013801600039005279
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,16,1,0,0.02786720097064972
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16,8,0,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16,2,0,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16,4,0,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16,16,0,0.017399999499320983
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16,32,0,0.020001600682735442
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16,64,0,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,32,1,0,0.022856000065803527
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,32,16,0,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,32,2,0,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,32,64,0,0.014628799259662628
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,1,16384,2,0,4.359692764282227
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,32,4,0,0.014569599926471711
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,32,8,0,0.01669439971446991
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,32,32,0,0.012617599964141846
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,32,1,0,0.02693440020084381
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,32,2,0,0.01876160055398941
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,32,4,0,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,32,8,0,0.017980800569057466
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,32,16,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,32,32,0,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,32,64,0,0.01659359931945801
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,64,1,0,0.02494560033082962
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,64,2,0,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,1,16384,1,0,8.899066925048828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,64,4,0,0.0147024005651474
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,64,8,0,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,64,16,0,0.014612799882888794
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,64,32,0,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,64,64,0,0.014604799449443817
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,64,1,0,0.033004799485206605
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,64,2,0,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,64,4,0,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,64,8,0,0.016846400499343873
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,64,16,0,0.01653759926557541
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,64,32,0,0.017360000312328337
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,64,64,0,0.01672479957342148
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,1,16384,1,0,8.795345306396484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,128,1,0,0.031009599566459656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,128,2,0,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,128,4,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,128,8,0,0.015123200416564942
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,128,16,0,0.014713600277900696
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,128,32,0,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,128,64,0,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,128,1,0,0.03303839862346649
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,128,2,0,0.024172799289226533
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,128,4,0,0.020695999264717102
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,128,8,0,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,128,16,0,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,128,32,0,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,128,64,0,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,256,1,0,0.04528000056743622
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,256,2,0,0.02885279953479767
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,256,4,0,0.02456959933042526
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,256,8,0,0.02082560062408447
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,256,16,0,0.01863359957933426
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,256,32,0,0.018751999735832213
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,256,64,0,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,256,1,0,0.048758399486541745
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,256,2,0,0.033025598526000975
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,256,4,0,0.026766398549079896
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,256,8,0,0.02481919974088669
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,256,16,0,0.022742399573326112
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,256,32,0,0.022785599529743194
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,256,64,0,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,512,1,0,0.090692800283432
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,512,2,0,0.053660798072814944
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,512,4,0,0.03299840092658997
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,512,8,0,0.027063998579978942
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,512,16,0,0.024742400646209715
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,512,64,0,0.02268960028886795
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,512,32,0,0.022734400629997254
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,512,1,0,0.09409599900245666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,512,2,0,0.056783998012542726
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,512,4,0,0.03558399975299835
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,512,8,0,0.03126240074634552
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,512,16,0,0.027001601457595826
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,512,64,0,0.02675360143184662
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,512,32,0,0.026836800575256347
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,1024,1,0,0.1885823965072632
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1024,2,0,0.10916800498962402
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1024,4,0,0.06548640131950378
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1024,2,0,0.11185120344161988
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1024,8,0,0.04197919964790344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1024,8,0,0.04533439874649048
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1024,16,0,0.037136000394821164
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1024,32,0,0.03291999995708465
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1024,64,0,0.030888000130653383
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,1024,1,0,0.19580639600753785
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,1536,1,0,0.31663360595703127
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1024,4,0,0.06985120177268982
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1024,16,0,0.04005599915981293
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1024,32,0,0.03712800145149231
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1024,64,0,0.03506399989128113
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,1536,4,0,0.10323679447174072
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,1536,8,0,0.0669327974319458
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,1536,2,0,0.17783039808273315
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,1536,2,0,0.17443840503692626
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,1536,16,0,0.047302401065826415
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,1536,32,0,0.04317440092563629
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,1536,64,0,0.03920960128307342
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,1536,4,0,0.10690079927444458
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,1536,8,0,0.06980320215225219
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,1536,16,0,0.04996480047702789
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,1536,1,0,0.3205391883850098
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,1536,32,0,0.045326399803161624
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,1536,64,0,0.04325119853019714
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,2048,2,0,0.25105440616607666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,2048,1,0,0.4692575931549072
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,2048,4,0,0.14603040218353272
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,2048,8,0,0.09064639806747436
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,2048,16,0,0.05767040252685547
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,2048,32,0,0.05144799947738647
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,2048,64,0,0.049348801374435425
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,2048,4,0,0.14897279739379882
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,2048,2,0,0.2570159912109375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,2048,1,0,0.4775407791137695
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,2048,8,0,0.09454399943351746
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,2048,16,0,0.061667197942733766
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,2048,32,0,0.05637279748916626
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,2048,64,0,0.053179198503494264
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,3072,4,0,0.24776160717010498
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,3072,2,0,0.45194082260131835
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,3072,16,0,0.09652000069618225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,3072,8,0,0.14656959772109984
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,3072,32,0,0.07197759747505188
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,3072,1,0,0.8709728240966796
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,3072,64,0,0.0669040024280548
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,3072,2,0,0.4565631866455078
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,3072,4,0,0.2511807918548584
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,3072,8,0,0.15206559896469116
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,3072,16,0,0.09998400211334228
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,3072,1,0,0.8732687950134277
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,3072,32,0,0.07612959742546081
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,3072,64,0,0.07060959935188293
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,4096,16,0,0.13942879438400269
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,4096,4,0,0.3778543949127197
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,4096,64,0,0.08452640175819397
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,4096,2,0,0.710427188873291
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,4096,8,0,0.21772480010986328
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,4096,32,0,0.09089440107345581
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,4096,1,0,1.3836496353149415
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,4096,16,0,0.1428272008895874
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,4096,8,0,0.22069120407104492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,4096,4,0,0.3838223934173584
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,4096,32,0,0.09471840262413025
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,4096,2,0,0.7147679805755616
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,4096,1,0,1.3851903915405273
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,4096,64,0,0.08829759955406188
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,6144,32,0,0.15705920457839967
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,6144,8,0,0.3989408016204834
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,6144,16,0,0.2361056089401245
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,6144,64,0,0.12350080013275147
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,6144,4,0,0.7303008079528809
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,6144,2,0,1.399619197845459
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,6144,32,0,0.1603808045387268
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,6144,16,0,0.24019041061401367
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,6144,8,0,0.4052175998687744
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,6144,64,0,0.1254271984100342
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,6144,4,0,0.7337376117706299
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,6144,2,0,1.40524320602417
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,6144,1,0,2.797520065307617
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,8192,16,0,0.3627327919006348
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,8192,32,0,0.23456161022186278
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,8192,64,0,0.19455519914627076
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,8192,8,0,0.6576352119445801
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,8192,4,0,1.1954607963562012
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,6144,1,0,2.7518592834472657
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,8192,16,0,0.3677295923233032
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,8192,32,0,0.29012959003448485
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,8192,2,0,2.3719503402709963
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,8192,64,0,0.16117119789123535
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,8192,8,0,0.6508912086486817
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,8192,4,0,1.2860848426818847
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,10240,16,0,0.5849775791168212
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,8192,2,0,2.376163291931152
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,10240,8,0,0.9331999778747558
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,10240,32,0,0.37109439373016356
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,10240,64,0,0.25493440628051756
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,8192,1,0,4.732502365112305
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,10240,4,0,1.7717967987060548
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,10240,16,0,0.549121618270874
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,8192,1,0,4.629332733154297
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,10240,8,0,1.1162256240844726
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,10240,32,0,0.33944799900054934
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,10240,2,0,3.648945617675781
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,10240,64,0,0.27296640872955324
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,10240,4,0,1.8129167556762695
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,12288,16,0,0.8054880142211914
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,10240,2,0,3.581679916381836
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,12288,8,0,1.4422016143798828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,12288,32,0,0.4475376129150391
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,12288,64,0,0.352838397026062
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,12288,4,0,2.698384094238281
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,10240,1,0,7.349371337890625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,12288,8,0,1.4454095840454102
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,12288,16,0,0.8640480041503906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,12288,2,0,4.972027206420899
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,10240,1,0,7.411759948730468
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,12288,32,0,0.5126815795898437
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,12288,64,0,0.3239919900894165
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,12288,4,0,2.8176895141601563
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,2,16384,16,0,1.2926544189453124
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,12288,2,0,5.137276840209961
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,2,16384,8,0,2.164295959472656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,2,16384,32,0,0.751417589187622
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,2,16384,64,0,0.5377056121826171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,2,16384,4,0,4.199566268920899
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,12288,1,0,10.42754898071289
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,2,16384,16,0,1.4411184310913085
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,2,16384,8,0,2.532200050354004
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,12288,1,0,10.608599853515624
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,2,16384,32,0,0.7138224124908448
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,2,16384,4,0,4.349631881713867
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,16,1,0,0.034108799695968625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16,2,0,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16,4,0,0.019047999382019044
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16,8,0,0.017444799840450286
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,2,16384,64,0,0.5394896030426025
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16,16,0,0.014484800398349762
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16,32,0,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16,64,0,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16,4,0,0.022737599909305573
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,16,1,0,0.036955198645591734
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16,2,0,0.02268799990415573
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,2,16384,2,0,9.010433959960938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16,8,0,0.02296479940414429
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16,16,0,0.020796799659729005
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16,32,0,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16,64,0,0.01735839992761612
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,32,1,0,0.037427198886871335
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,32,2,0,0.027928000688552855
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,32,4,0,0.019390399754047393
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,32,8,0,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,32,16,0,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,32,32,0,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,32,64,0,0.014558400213718414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,32,1,0,0.034964799880981445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,32,2,0,0.0227183997631073
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,32,16,0,0.016702400147914888
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,32,4,0,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,32,8,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,32,32,0,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,32,64,0,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,2,16384,2,0,9.050978851318359
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,64,1,0,0.03662239909172058
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,64,2,0,0.028854399919509888
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,64,32,0,0.014609600603580474
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,64,64,0,0.014460800588130951
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,64,4,0,0.018492799997329713
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,64,8,0,0.016523200273513793
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,64,4,0,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,64,16,0,0.016715200245380403
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,64,1,0,0.04026080071926117
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,64,2,0,0.029091200232505797
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,64,8,0,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,64,16,0,0.016873599588871004
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,64,32,0,0.022419199347496033
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,64,64,0,0.01661919951438904
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,128,1,0,0.04816479980945587
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,128,2,0,0.030836799740791322
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,128,4,0,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,128,8,0,0.020686399936676026
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,128,16,0,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,128,32,0,0.016548800468444824
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,128,64,0,0.014484800398349762
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,128,1,0,0.05267999768257141
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,128,2,0,0.03516480028629303
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,128,4,0,0.026948800683021544
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,128,8,0,0.02152799963951111
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,128,16,0,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,128,32,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,256,8,0,0.024803200364112855
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,128,64,0,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,256,1,0,0.08117920160293579
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,256,2,0,0.04611839950084686
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,256,4,0,0.030905601382255555
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,256,16,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,256,32,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,256,64,0,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,256,1,0,0.08558080196380616
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,256,2,0,0.05034080147743225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,256,4,0,0.03311040103435516
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,2,16384,1,0,18.267213439941408
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,256,8,0,0.026899200677871705
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,256,16,0,0.02324800044298172
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,256,32,0,0.022675199806690215
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,512,16,0,0.02889440059661865
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,256,64,0,0.02271520048379898
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,512,32,0,0.024915200471878052
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,512,1,0,0.1606816053390503
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,512,2,0,0.09083840250968933
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,512,8,0,0.03508639931678772
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,512,4,0,0.07224159836769103
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,512,64,0,0.02489279955625534
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,512,2,0,0.09602879881858825
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,512,1,0,0.16461440324783325
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,512,4,0,0.05767040252685547
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,512,8,0,0.037364798784255984
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,512,16,0,0.03296799957752228
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,512,32,0,0.032974401116371156
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,512,64,0,0.028825598955154418
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,2,16384,1,0,18.674092102050782
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,1024,1,0,0.35670719146728513
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1024,2,0,0.19162559509277344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1024,4,0,0.11099519729614257
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1024,8,0,0.06873760223388672
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1024,16,0,0.04517279863357544
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1024,32,0,0.041652798652648926
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1024,64,0,0.035150399804115294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1024,2,0,0.19768480062484742
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1024,4,0,0.11542719602584839
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,1024,1,0,0.39935998916625975
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1024,8,0,0.0725055992603302
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1024,16,0,0.04735519886016846
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1024,32,0,0.04126879870891571
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1024,64,0,0.04189600050449371
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,1536,2,0,0.32031519412994386
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,1536,4,0,0.1779263973236084
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,1536,1,0,0.6081439971923828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,1536,8,0,0.11099840402603149
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,1536,16,0,0.07024800181388854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,1536,32,0,0.05149120092391968
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,1536,16,0,0.0739471971988678
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,1536,64,0,0.0453792005777359
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,1536,2,0,0.3262336015701294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,1536,4,0,0.1824671983718872
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,1536,1,0,0.6155407905578614
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,1536,8,0,0.11214079856872558
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,1536,32,0,0.055542397499084475
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,2048,16,0,0.09485759735107421
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,1536,64,0,0.0514303982257843
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,2048,4,0,0.2572175979614258
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,2048,8,0,0.15107840299606323
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,2048,2,0,0.47723679542541503
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,2048,32,0,0.06243680119514465
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,2048,1,0,0.9239151954650879
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,2048,64,0,0.057608002424240114
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,2048,4,0,0.2636096000671387
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,2048,8,0,0.15783679485321045
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,2048,2,0,0.4811391830444336
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,2048,16,0,0.09950879812240601
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,2048,32,0,0.06751360297203064
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,2048,1,0,0.9266528129577637
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,2048,64,0,0.061705601215362546
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,3072,4,0,0.460916805267334
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,3072,8,0,0.25544641017913816
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,3072,16,0,0.15648479461669923
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,3072,32,0,0.10456479787826538
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,3072,2,0,0.8788047790527344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,3072,64,0,0.08048639893531799
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,3072,1,0,1.7197887420654296
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,3072,8,0,0.26080000400543213
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,3072,16,0,0.1714192032814026
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,3072,4,0,0.4668288230895996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,3072,32,0,0.10965440273284913
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,3072,64,0,0.09556639790534974
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,3072,2,0,0.8810447692871094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,4096,16,0,0.2387471914291382
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,4096,8,0,0.39221758842468263
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,3072,1,0,1.7247615814208985
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,4096,32,0,0.15394560098648072
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,4096,4,0,0.7210671901702881
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,4096,64,0,0.10779520273208618
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,4096,2,0,1.409164810180664
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,4096,16,0,0.2398927927017212
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,4096,8,0,0.41090879440307615
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,4096,32,0,0.17738239765167235
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,4096,4,0,0.7283599853515625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,4096,64,0,0.11085599660873413
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,4096,2,0,1.4492079734802246
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,4096,1,0,2.8063072204589843
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,6144,16,0,0.417907190322876
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,6144,64,0,0.20937120914459229
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,6144,32,0,0.2649951934814453
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,6144,8,0,0.7473120212554931
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,4096,1,0,2.8679264068603514
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,6144,4,0,1.4081232070922851
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,6144,8,0,0.7549295902252198
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,6144,32,0,0.2962192058563232
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,6144,16,0,0.4874735832214355
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,6144,64,0,0.18076640367507935
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,6144,4,0,1.4254480361938477
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,6144,2,0,2.7799039840698243
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,8192,16,0,0.6958735942840576
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,6144,2,0,2.8023456573486327
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,8192,8,0,1.2389679908752442
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,8192,32,0,0.42244319915771483
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,8192,64,0,0.3178159952163696
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,8192,4,0,2.3559328079223634
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,6144,1,0,5.69447021484375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,8192,16,0,0.8208928108215332
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,8192,8,0,1.3381279945373534
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,6144,1,0,5.960396957397461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,8192,4,0,2.37423038482666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,8192,64,0,0.3272464036941528
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,8192,32,0,0.5017199993133545
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,8192,2,0,4.824137496948242
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,10240,16,0,1.1583984375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,8192,2,0,4.817416000366211
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,10240,8,0,1.8864736557006836
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,10240,32,0,0.6608208179473877
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,10240,64,0,0.45339360237121584
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,10240,4,0,3.6923568725585936
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,8192,1,0,9.865502166748048
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,10240,8,0,2.0327072143554688
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,10240,16,0,1.127899169921875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,8192,1,0,9.79931869506836
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,10240,4,0,3.547280120849609
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,10240,32,0,0.6287360191345215
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,10240,64,0,0.4012911796569824
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,10240,2,0,7.515555572509766
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,12288,16,0,1.5788240432739258
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,12288,8,0,2.505665588378906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,10240,2,0,7.5496673583984375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,12288,32,0,0.7957551956176758
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,12288,64,0,0.5646912097930908
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,12288,4,0,5.071542358398437
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,10240,1,0,15.051712036132812
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,12288,8,0,2.8691551208496096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,12288,4,0,5.158943939208984
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,12288,16,0,1.5153552055358888
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,12288,32,0,0.8835519790649414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,12288,64,0,0.5697599887847901
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,12288,2,0,10.9824462890625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,10240,1,0,15.165969848632812
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,4,16384,16,0,2.5563888549804688
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,12288,2,0,10.690825653076171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,4,16384,32,0,1.4189855575561523
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,4,16384,64,0,0.9028240203857422
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,4,16384,8,0,4.761912155151367
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,4,16384,4,0,8.972927856445313
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,12288,1,0,21.257203674316408
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,12288,1,0,21.640043640136717
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,4,16384,16,0,2.24136962890625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,4,16384,64,0,0.8128671646118164
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,4,16384,8,0,4.821676635742188
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,16,1,0,0.04938879907131195
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,16,2,0,0.035006400942802426
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,16,4,0,0.02276639938354492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,4,16384,32,0,1.480735969543457
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,16,8,0,0.01886080056428909
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,16,16,0,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,16,32,0,0.01560640037059784
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,16,64,0,0.01650400012731552
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,16,1,0,0.049481600522994995
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,16,2,0,0.032393598556518556
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,16,4,0,0.027014398574829103
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,4,16384,2,0,18.308680725097656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,16,8,0,0.02064639925956726
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,16,16,0,0.018972800672054292
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,16,32,0,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,4,16384,4,0,9.184336090087891
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,16,64,0,0.02080000042915344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,32,1,0,0.053502398729324344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,32,2,0,0.032334399223327634
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,32,4,0,0.026740801334381104
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,32,8,0,0.020982399582862854
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,32,16,0,0.018787199258804323
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,32,32,0,0.014542399346828461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,32,64,0,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,32,1,0,0.06387199759483338
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,32,2,0,0.0372512012720108
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,32,4,0,0.024742400646209715
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,32,8,0,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,32,16,0,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,32,32,0,0.01863040030002594
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,32,64,0,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,64,1,0,0.06014400124549866
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,64,2,0,0.0370608001947403
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,64,4,0,0.024799999594688416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,64,8,0,0.016681599617004394
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,64,16,0,0.01660960018634796
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,64,32,0,0.014796799421310425
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,64,64,0,0.01465120017528534
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,64,1,0,0.061959999799728396
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,64,2,0,0.041201600432395936
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,64,4,0,0.027215999364852906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,64,64,0,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,64,8,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,64,16,0,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,64,32,0,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,128,1,0,0.0844048023223877
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,128,2,0,0.04896480143070221
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,4,16384,2,0,18.244473266601563
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,128,64,0,0.014564800262451171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,128,4,0,0.031011199951171874
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,128,8,0,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,128,16,0,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,128,32,0,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,128,2,0,0.053198397159576416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,128,1,0,0.10824320316314698
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,128,64,0,0.018648000061511995
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,128,4,0,0.033073601126670835
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,128,8,0,0.024823999404907225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,256,2,0,0.0838479995727539
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,128,16,0,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,128,32,0,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,256,1,0,0.15712159872055054
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,256,4,0,0.04734239876270294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,256,8,0,0.03116639852523804
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,256,16,0,0.024743999540805816
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,256,32,0,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,256,64,0,0.02231840044260025
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,256,1,0,0.16200319528579712
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,256,2,0,0.08814719915390015
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,256,4,0,0.05146239995956421
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,256,8,0,0.033108800649642944
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,256,16,0,0.029075199365615846
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,256,32,0,0.026089599728584288
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,512,4,0,0.09493280053138733
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,256,64,0,0.02667680084705353
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,512,1,0,0.3001487970352173
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,512,64,0,0.02902719974517822
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,512,2,0,0.1643231987953186
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,512,8,0,0.058324801921844485
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,512,16,0,0.037084800004959104
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,512,32,0,0.03287839889526367
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,512,2,0,0.1685871958732605
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,512,1,0,0.30429279804229736
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,512,4,0,0.09864640235900879
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,512,16,0,0.04068480134010315
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,512,32,0,0.035148799419403076
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,512,64,0,0.033011201024055484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,512,8,0,0.062115198373794554
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1024,4,0,0.19881759881973265
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1024,8,0,0.11763039827346802
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1024,2,0,0.3635296106338501
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1024,16,0,0.0741536021232605
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,1024,1,0,0.6938176155090332
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1024,32,0,0.049619200825691226
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1024,64,0,0.045238399505615236
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1024,2,0,0.36816959381103515
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1024,4,0,0.20269920825958251
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1024,8,0,0.12239359617233277
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,1024,1,0,0.6986288070678711
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1024,16,0,0.07819839715957641
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1024,32,0,0.05459200143814087
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1024,64,0,0.0485727995634079
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,1536,4,0,0.32848639488220216
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,1536,8,0,0.18797919750213624
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,1536,2,0,0.6186592102050781
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,1536,16,0,0.11537439823150634
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,1536,1,0,1.204742431640625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,1536,32,0,0.0792144000530243
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,1536,64,0,0.05974559783935547
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,1536,4,0,0.33241920471191405
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,1536,8,0,0.19128799438476562
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,1536,16,0,0.12160799503326417
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,1536,2,0,0.6222655773162842
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,1536,32,0,0.08513919711112976
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,1536,64,0,0.06374239921569824
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,1536,1,0,1.2093024253845215
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,2048,8,0,0.26815359592437743
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,2048,16,0,0.17110879421234132
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,2048,4,0,0.49745922088623046
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,2048,32,0,0.10829919576644897
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,2048,2,0,0.9336864471435546
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,2048,64,0,0.07737280130386352
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,4,16384,1,0,36.73269958496094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,2048,1,0,1.8428352355957032
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,2048,8,0,0.2799839973449707
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,2048,4,0,0.4937488079071045
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,2048,2,0,0.936348819732666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,2048,64,0,0.08198080062866211
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,2048,32,0,0.1270848035812378
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,2048,16,0,0.22623839378356933
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,2048,1,0,1.85089111328125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,3072,16,0,0.36990559101104736
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,3072,8,0,0.5199888229370118
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,3072,4,0,0.8956607818603516
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,3072,32,0,0.17970240116119385
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,3072,64,0,0.1667215943336487
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,3072,8,0,0.4864783763885498
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,3072,2,0,1.7345232009887694
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,3072,16,0,0.3764271974563599
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,3072,32,0,0.18071999549865722
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,3072,4,0,0.9216591835021972
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,3072,64,0,0.14871200323104858
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,3072,2,0,1.7783327102661133
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,3072,1,0,3.454652786254883
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,4096,8,0,0.7900032043457031
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,4096,16,0,0.43384318351745604
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,4096,32,0,0.3029599905014038
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,4096,64,0,0.1853487968444824
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,4096,4,0,1.4204976081848144
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,3072,1,0,3.454604721069336
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,4,16384,1,0,36.71126708984375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,4096,8,0,0.7653744220733643
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,4096,16,0,0.470249605178833
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,4096,2,0,2.7990911483764647
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,4096,64,0,0.18740639686584473
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,4096,32,0,0.336411190032959
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,4096,4,0,1.4241888046264648
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,6144,16,0,0.8303071975708007
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,4096,2,0,2.9312400817871094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,6144,8,0,1.6310335159301759
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,6144,32,0,0.4938608169555664
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,6144,64,0,0.33861598968505857
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,4096,1,0,5.646078491210938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,6144,4,0,2.817198371887207
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,6144,16,0,0.9423600196838379
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,4096,1,0,5.833812713623047
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,6144,8,0,1.6702735900878907
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,6144,32,0,0.5585311889648438
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,6144,64,0,0.35768001079559325
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,6144,4,0,2.847934341430664
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,6144,2,0,5.902643203735352
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,8192,16,0,1.4543423652648926
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,6144,2,0,5.79466552734375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,8192,8,0,2.57775993347168
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,8192,32,0,0.7945536136627197
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,8192,64,0,0.5370223999023438
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,8192,4,0,4.886006546020508
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,6144,1,0,11.857459259033202
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,8192,8,0,2.7180255889892577
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,8192,16,0,1.5670495986938477
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,8192,64,0,0.524729585647583
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,8192,32,0,0.8166288375854492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,6144,1,0,12.111078643798828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,8192,4,0,5.093609619140625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,8192,2,0,10.039644622802735
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,10240,16,0,2.176388740539551
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,10240,8,0,3.7459438323974608
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,8192,2,0,10.130636596679688
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,10240,32,0,1.235689640045166
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,10240,64,0,0.7252448081970215
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,10240,4,0,7.775012969970703
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,8192,1,0,20.255825805664063
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,10240,8,0,3.9132736206054686
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,10240,16,0,2.0307392120361327
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,10240,4,0,7.7877052307128904
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,10240,32,0,1.1644975662231445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,10240,64,0,0.7742368221282959
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,8192,1,0,20.195297241210938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,10240,2,0,15.301748657226563
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,10240,2,0,15.415103149414062
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,12288,8,0,5.565667343139649
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,12288,32,0,1.6187616348266602
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,12288,16,0,2.5806720733642576
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,12288,64,0,1.0150272369384765
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,12288,4,0,10.711398315429687
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,10240,1,0,30.95935974121094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,12288,8,0,5.452304077148438
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,12288,32,0,1.6555120468139648
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,12288,4,0,10.715414428710938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,12288,16,0,3.0602272033691404
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,12288,64,0,1.0032048225402832
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,12288,2,0,21.340493774414064
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,10240,1,0,30.834835815429688
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,8,16384,32,0,2.710103988647461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,8,16384,64,0,1.3962464332580566
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,8,16384,16,0,4.707974243164062
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,12288,2,0,21.303172302246093
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,8,16384,8,0,9.09298095703125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,8,16384,16,0,4.890910339355469
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,8,16384,32,0,2.668462371826172
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,8,16384,4,0,18.418385314941407
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,16,1,0,0.0902239978313446
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,8,16384,8,0,9.346920013427734
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,8,16384,64,0,1.4476960182189942
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,16,2,0,0.05464159846305847
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,16,4,0,0.034995201230049136
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,16,8,0,0.022787199914455415
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,16,16,0,0.020667199790477753
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,16,32,0,0.01698399931192398
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,16,64,0,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,16,1,0,0.09557120203971863
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,16,2,0,0.05780959725379944
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,16,4,0,0.03248479962348938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,16,8,0,0.024723200500011443
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,16,16,0,0.024745599925518037
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,16,64,0,0.018648000061511995
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,16,32,0,0.017825600504875184
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,32,2,0,0.05286880135536194
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,32,1,0,0.1053920030593872
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,32,4,0,0.033046400547027587
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,32,8,0,0.022942399978637694
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,32,16,0,0.01842239946126938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,32,32,0,0.015880000591278077
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,32,64,0,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,32,1,0,0.09418240189552307
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,32,4,0,0.03938399851322174
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,32,2,0,0.05325919985771179
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,32,8,0,0.027118399739265442
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,32,16,0,0.020632000267505647
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,32,32,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,32,64,0,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,64,1,0,0.1103600025177002
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,64,2,0,0.0606112003326416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,64,4,0,0.03720319867134094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,64,16,0,0.018772800266742707
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,64,8,0,0.02691200077533722
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,64,32,0,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,64,64,0,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,64,1,0,0.11503360271453858
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,64,2,0,0.06344000101089478
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,8,12288,1,0,44.18258056640625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,64,4,0,0.03978239893913269
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,64,8,0,0.028803199529647827
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,64,16,0,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,64,64,0,0.01748960018157959
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,128,1,0,0.15644960403442382
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,64,32,0,0.0254256010055542
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,128,2,0,0.08518239855766296
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,128,4,0,0.050254398584365846
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,128,8,0,0.03292160034179688
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,128,16,0,0.028942400217056276
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,128,32,0,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,128,64,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,128,1,0,0.1591215968132019
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,128,2,0,0.09855999946594238
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,128,4,0,0.05357120037078857
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,8,16384,4,0,18.252626037597658
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,128,8,0,0.03668160140514374
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,128,16,0,0.0268640011548996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,128,32,0,0.024822400510311128
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,128,64,0,0.021188800036907197
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,256,4,0,0.08607199788093567
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,256,2,0,0.1475600004196167
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,256,8,0,0.05347040295600891
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,256,1,0,0.3691472053527832
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,256,16,0,0.03304960131645203
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,256,32,0,0.028833600878715514
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,256,64,0,0.02484000027179718
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,256,1,0,0.282590389251709
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,256,2,0,0.20516960620880126
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,256,16,0,0.037529599666595456
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,256,4,0,0.09029920101165771
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,256,8,0,0.05532159805297852
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,256,32,0,0.033024001121521
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,256,64,0,0.028984001278877257
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,512,1,0,0.5797967910766602
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,512,2,0,0.3053584098815918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,512,4,0,0.16921919584274292
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,512,8,0,0.10296319723129273
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,512,16,0,0.06325759887695312
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,512,32,0,0.04328159987926483
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,512,64,0,0.03758879899978638
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,512,4,0,0.17396639585494994
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,512,8,0,0.10526080131530761
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,512,2,0,0.3095263957977295
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,512,16,0,0.06840159893035888
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,512,1,0,0.5990335941314697
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,512,32,0,0.047391998767852786
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,512,64,0,0.042027199268341066
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1024,8,0,0.2094111919403076
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1024,4,0,0.3724031925201416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1024,2,0,0.7154287815093994
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1024,16,0,0.1277791976928711
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1024,32,0,0.08570719957351684
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1024,64,0,0.06175680160522461
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,1024,1,0,1.3750736236572265
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,8,12288,1,0,44.405209350585935
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1024,4,0,0.3811743974685669
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1024,8,0,0.22746880054473878
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1024,2,0,0.7105296134948731
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1024,32,0,0.09065120220184326
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1024,64,0,0.06986399888992309
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1024,16,0,0.17234879732131958
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,1024,1,0,1.3789055824279786
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,1536,16,0,0.2654880046844482
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,1536,32,0,0.1317792057991028
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,1536,8,0,0.35341439247131345
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,1536,4,0,0.6370704174041748
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,1536,64,0,0.12459360361099243
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,1536,2,0,1.2214063644409179
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,1536,8,0,0.39258880615234376
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,1536,16,0,0.214355206489563
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,1536,4,0,0.6450272083282471
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,1536,32,0,0.1462000012397766
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,1536,2,0,1.2331567764282227
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,1536,1,0,2.4055759429931642
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,1536,64,0,0.10308480262756348
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,2048,16,0,0.29331679344177247
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,2048,8,0,0.5687615871429443
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,8,16384,2,0,36.69037780761719
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,1536,1,0,2.431625556945801
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,2048,32,0,0.18547359704971314
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,2048,4,0,0.9623519897460937
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,2048,64,0,0.1389616012573242
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,2048,2,0,1.8674768447875976
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,2048,32,0,0.195689594745636
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,2048,16,0,0.31767840385437013
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,2048,8,0,0.5445199966430664
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,2048,64,0,0.15227680206298827
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,2048,4,0,0.9622351646423339
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,2048,2,0,1.9785648345947267
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,2048,1,0,3.7522048950195312
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,3072,8,0,0.9334287643432617
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,3072,64,0,0.21494719982147217
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,3072,16,0,0.5339295864105225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,3072,32,0,0.3552272081375122
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,3072,4,0,1.7942991256713867
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,3072,8,0,0.9659472465515136
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,2048,1,0,4.26298713684082
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,3072,2,0,3.557494354248047
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,3072,16,0,0.5630127906799316
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,3072,32,0,0.3916735887527466
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,3072,4,0,1.8800048828125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,3072,64,0,0.26127679347991944
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,3072,2,0,3.6014430999755858
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,8,16384,2,0,36.86334228515625
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,4096,8,0,1.5489215850830078
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,4096,16,0,0.8335359573364258
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,4096,64,0,0.33035519123077395
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,4096,32,0,0.5910704135894775
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,3072,1,0,7.3940574645996096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,4096,4,0,2.874715232849121
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,4096,16,0,0.8532015800476074
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,3072,1,0,7.342713928222656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,4096,8,0,1.6445775985717774
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,4096,32,0,0.5015727996826171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,4096,64,0,0.3745151996612549
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,4096,2,0,5.9563743591308596
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,4096,4,0,3.2259422302246095
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,6144,16,0,1.7048431396484376
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,4096,2,0,5.783553695678711
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,6144,32,0,0.9565216064453125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,6144,8,0,3.1112943649291993
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,6144,64,0,0.6247583866119385
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,4096,1,0,12.079612731933594
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,6144,4,0,6.009955215454101
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,6144,16,0,1.7677568435668944
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,4096,1,0,12.216754913330078
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,6144,32,0,0.970257568359375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,6144,8,0,3.369308853149414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,6144,64,0,0.6504079818725585
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,6144,4,0,5.9565166473388675
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,6144,2,0,11.965614318847656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,16,8192,16,0,2.6238191604614256
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,6144,2,0,11.992107391357422
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,16,8192,8,0,5.1227264404296875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,16,8192,32,0,1.4861248016357422
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,16,8192,64,0,0.7939311981201171
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,16,8192,16,0,2.4968896865844727
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,16,8192,4,0,10.176747131347657
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,16,8192,32,0,1.6304271697998047
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,16,1,0,0.15176960229873657
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,16,8192,64,0,1.0055279731750488
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,16,2,0,0.07467359900474549
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,16,4,0,0.05379040241241455
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,16,8,0,0.02898240089416504
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,16,16,0,0.02479040026664734
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,16,8192,8,0,5.003472137451172
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,16,32,0,0.016521599888801575
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,16,64,0,0.018624000251293182
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,16,2,0,0.07398080229759216
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,16,4,0,0.056707197427749635
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,16,1,0,0.16048480272293092
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,16,8,0,0.031068798899650574
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,16,16,0,0.027065598964691163
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,16,32,0,0.022811199724674224
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,16,64,0,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,32,1,0,0.18771040439605713
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,32,2,0,0.10722719430923462
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,32,4,0,0.04783360064029694
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,32,8,0,0.041223999857902524
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,32,16,0,0.02728480100631714
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,32,32,0,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,32,64,0,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,32,1,0,0.20061440467834474
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,32,2,0,0.08422399759292602
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,32,4,0,0.05296639800071716
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,32,8,0,0.045363199710845944
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,32,16,0,0.02399040013551712
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,32,64,0,0.024718399345874786
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,32,32,0,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,16,6144,1,0,25.941900634765624
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,64,1,0,0.19606560468673706
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,64,2,0,0.14336800575256348
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,64,4,0,0.06421599984169006
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,64,16,0,0.026819199323654175
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,64,8,0,0.04875679910182953
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,64,32,0,0.024710400402545928
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,64,64,0,0.018742400407791137
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,64,1,0,0.199454402923584
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,64,2,0,0.14142240285873414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,64,4,0,0.06711360216140747
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,64,8,0,0.04132800102233887
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,64,16,0,0.03089120090007782
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,64,32,0,0.028492799401283263
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,64,64,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,128,1,0,0.29167358875274657
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,128,2,0,0.1575584053993225
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,16,8192,4,0,10.228256225585938
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,128,4,0,0.11177279949188232
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,128,8,0,0.05486720204353333
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,128,16,0,0.03516800105571747
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,128,32,0,0.02686080038547516
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,128,2,0,0.16200480461120606
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,128,1,0,0.2994159936904907
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,128,64,0,0.026943999528884887
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,128,4,0,0.09226880073547364
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,16,6144,1,0,25.1660888671875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,128,64,0,0.028798401355743408
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,128,16,0,0.03920480012893677
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,128,8,0,0.05589280128479004
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,128,32,0,0.03089759945869446
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,256,1,0,0.5276815891265869
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,256,8,0,0.0925279974937439
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,16,8192,2,0,19.80566864013672
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,256,4,0,0.16582560539245605
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,256,16,0,0.055579197406768796
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,256,2,0,0.33979361057281493
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,256,32,0,0.03932960033416748
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,256,64,0,0.03549120128154755
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,256,16,0,0.06999999880790711
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,256,2,0,0.35982398986816405
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,256,1,0,0.5313392162322998
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,256,4,0,0.16243040561676025
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,256,8,0,0.09621120095252991
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,256,32,0,0.04319359958171844
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,256,64,0,0.03915359973907471
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,512,4,0,0.3170032024383545
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,512,8,0,0.18200639486312867
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,512,16,0,0.11132320165634155
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,512,32,0,0.07482560276985169
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,512,64,0,0.05348479747772217
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,512,2,0,0.7576479911804199
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,512,1,0,1.263766384124756
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,512,8,0,0.18527840375900267
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,512,4,0,0.32353920936584474
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,512,16,0,0.11701279878616333
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,512,2,0,0.5982704162597656
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,512,32,0,0.09282879829406739
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,512,64,0,0.06390399932861328
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,512,1,0,1.1517487525939942
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1024,16,0,0.23209118843078613
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1024,8,0,0.3953632116317749
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1024,4,0,0.726966381072998
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1024,64,0,0.10867359638214111
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1024,32,0,0.1505519986152649
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1024,2,0,1.3932623863220215
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1024,8,0,0.4050432205200195
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1024,4,0,0.7375872135162354
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1024,16,0,0.24038240909576417
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1024,32,0,0.15946240425109864
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1024,64,0,0.116811203956604
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1024,2,0,1.403435230255127
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,1024,1,0,2.7543296813964844
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,16,8192,2,0,19.92694549560547
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,1536,16,0,0.3979248046875
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,1536,8,0,0.6832528114318848
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,1536,32,0,0.3153872013092041
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,1536,4,0,1.2559295654296876
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,1024,1,0,2.8017295837402343
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,1536,64,0,0.18105440139770507
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,1536,2,0,2.4590288162231446
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,1536,16,0,0.4058383941650391
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,1536,8,0,0.7332191944122315
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,1536,64,0,0.18164479732513428
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,1536,32,0,0.26521599292755127
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,1536,4,0,1.2840383529663086
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,1536,2,0,2.4846336364746096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,2048,8,0,1.003547191619873
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,1536,1,0,4.920161437988281
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,2048,64,0,0.2299567937850952
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,2048,32,0,0.3384687900543213
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,2048,16,0,0.5759376049041748
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,2048,4,0,1.9476160049438476
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,2048,16,0,0.614574384689331
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,1536,1,0,4.997470474243164
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,2048,8,0,1.0306639671325684
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,2048,2,0,3.7994705200195313
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,2048,32,0,0.37331039905548097
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,2048,64,0,0.2769599914550781
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,2048,4,0,1.923966407775879
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,2048,2,0,3.824537658691406
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,3072,16,0,1.029368019104004
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,3072,8,0,1.8800880432128906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,3072,64,0,0.39755361080169677
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,3072,32,0,0.6382448196411132
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,2048,1,0,7.79669418334961
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,3072,4,0,3.627452850341797
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,3072,16,0,1.152984046936035
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,2048,1,0,7.8776802062988285
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,3072,8,0,2.0753183364868164
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,3072,32,0,0.6621888160705567
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,3072,64,0,0.4611663818359375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,3072,4,0,3.600627136230469
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,3072,2,0,7.275614166259766
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,32,4096,16,0,1.6758560180664062
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,32,4096,32,0,1.0299216270446778
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,32,4096,64,0,0.6138063907623291
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,32,4096,8,0,3.0404272079467773
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,3072,2,0,7.340171051025391
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,32,4096,16,0,1.6926256179809571
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,32,4096,4,0,5.8797870635986325
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,32,4096,8,0,3.141758346557617
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,32,4096,32,0,0.9878447532653809
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,16,2,0,0.14261759519577027
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,16,1,0,0.25409278869628904
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,32,4096,64,0,0.6896143913269043
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,16,4,0,0.08143200278282166
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,16,8,0,0.047366398572921756
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,16,16,0,0.03341760039329529
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,16,32,0,0.022675199806690215
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,16,64,0,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,16,2,0,0.12962559461593628
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,16,1,0,0.2845312118530273
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,16,4,0,0.08511360287666321
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,16,8,0,0.04714080095291138
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,32,4096,4,0,5.8872943878173825
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,32,3072,1,0,16.20374450683594
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,16,16,0,0.03711360096931458
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,16,32,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,16,64,0,0.019990399479866028
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,32,1,0,0.3540992021560669
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,32,4,0,0.08678240180015565
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,32,2,0,0.18523999452590942
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,32,8,0,0.05094720125198364
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,32,16,0,0.03299840092658997
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,32,32,0,0.028836798667907716
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,32,2,0,0.15507359504699708
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,32,64,0,0.018731200695037843
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,32,1,0,0.32748000621795653
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,32,32,0,0.02595840096473694
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,32,4,0,0.08474720120429993
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,32,8,0,0.06372960209846497
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,32,16,0,0.03505760133266449
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,32,4096,2,0,12.059622192382813
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,64,4,0,0.13494720458984374
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,32,3072,1,0,16.25997314453125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,32,64,0,0.021984000504016877
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,64,2,0,0.19743679761886596
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,64,1,0,0.3733472108840942
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,64,8,0,0.06271679997444153
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,64,32,0,0.03514559864997864
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,64,16,0,0.05171359777450561
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,64,64,0,0.024820800125598907
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,64,2,0,0.20362401008605957
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,64,1,0,0.377238392829895
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,64,4,0,0.1152959942817688
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,64,8,0,0.067084801197052
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,64,16,0,0.05602239966392517
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,64,32,0,0.033048000931739804
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,64,64,0,0.024903999269008638
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,128,2,0,0.2963119983673096
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,128,4,0,0.16221920251846314
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,32,4096,2,0,12.04445571899414
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,128,1,0,0.5622159957885742
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,128,8,0,0.0968671977519989
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,128,16,0,0.05866879820823669
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,128,32,0,0.039724799990653994
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,128,64,0,0.031035199761390686
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,128,4,0,0.16643840074539185
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,128,1,0,0.5670447826385498
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,128,8,0,0.09915519952774048
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,128,2,0,0.37069599628448485
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,128,16,0,0.06340320110321045
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,128,32,0,0.04490880072116852
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,128,64,0,0.03519839942455292
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,256,4,0,0.28799359798431395
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,256,2,0,0.5362703800201416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,256,8,0,0.19747999906539918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,256,16,0,0.10289920568466186
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,256,32,0,0.06946560144424438
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,256,1,0,1.0303567886352538
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,256,4,0,0.32188000679016116
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,256,64,0,0.04955520033836365
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,256,8,0,0.1705423951148987
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,256,16,0,0.1091536045074463
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,256,2,0,0.5420176029205322
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,256,32,0,0.073990398645401
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,256,64,0,0.06167839765548706
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,256,1,0,1.0383968353271484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,512,32,0,0.13324320316314697
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,512,16,0,0.2021888017654419
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,512,64,0,0.09856799840927125
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,512,8,0,0.3381040096282959
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,512,4,0,0.6143263816833496
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,512,2,0,1.1663984298706054
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,512,16,0,0.21080639362335205
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,512,32,0,0.1409759998321533
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,512,8,0,0.3460095882415771
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,512,4,0,0.6202591896057129
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,512,2,0,1.1749296188354492
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,512,64,0,0.10462239980697632
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,512,1,0,2.278062438964844
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1024,32,0,0.27564799785614014
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1024,16,0,0.4384607791900635
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1024,64,0,0.2014080047607422
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1024,8,0,0.7694640159606934
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,512,1,0,2.292310333251953
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1024,4,0,1.4399840354919433
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1024,16,0,0.4747168064117432
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1024,8,0,0.7847072124481201
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1024,32,0,0.29991679191589354
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1024,64,0,0.21477758884429932
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1024,4,0,1.4639007568359375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1024,2,0,2.832151985168457
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,1536,16,0,0.7692192077636719
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1024,2,0,2.8381824493408203
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,1536,8,0,1.332472038269043
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,1536,32,0,0.4620368003845215
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,1536,64,0,0.3279792070388794
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,1536,4,0,2.51046085357666
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,1024,1,0,5.681067276000976
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,1024,1,0,5.745030212402344
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,1536,16,0,0.8123680114746094
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,1536,8,0,1.5067520141601562
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,1536,64,0,0.3355839967727661
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,1536,32,0,0.5226304054260253
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,1536,2,0,5.120087814331055
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,1536,4,0,2.5616016387939453
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,64,2048,32,0,0.7055168151855469
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,64,2048,16,0,1.2005904197692872
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,64,2048,64,0,0.4707632064819336
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,64,2048,8,0,2.0187376022338865
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,1536,2,0,5.154792022705078
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,64,2048,4,0,3.9055057525634767
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,64,2048,16,0,1.1792400360107422
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,64,2048,8,0,2.0435104370117188
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,64,2048,32,0,0.7658639907836914
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,64,2048,64,0,0.46898560523986815
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,16,4,0,0.12954879999160768
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,16,2,0,0.26796159744262693
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,16,1,0,0.47737760543823243
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,16,8,0,0.07071679830551147
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,16,16,0,0.044424000382423404
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,16,32,0,0.03150239884853363
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,16,64,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,64,1536,1,0,11.54007339477539
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,16,2,0,0.24914560317993165
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,16,1,0,0.45801920890808107
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,64,2048,4,0,3.9008720397949217
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,16,4,0,0.1365823984146118
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,16,8,0,0.07684959769248963
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,16,16,0,0.04930559992790222
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,16,64,0,0.025150400400161744
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,16,32,0,0.03691680133342743
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,32,2,0,0.29035680294036864
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,32,4,0,0.16552000045776366
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,32,1,0,0.5552351951599122
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,32,8,0,0.09666399955749512
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,32,16,0,0.05308640003204346
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,32,32,0,0.037031999230384825
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,64,2048,2,0,7.925091552734375
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,32,64,0,0.0268640011548996
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,64,1536,1,0,11.547293090820313
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,32,2,0,0.28608479499816897
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,32,4,0,0.16055840253829956
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,32,16,0,0.05556480288505554
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,32,1,0,0.6248271942138672
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,32,64,0,0.028814399242401124
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,32,8,0,0.09683679938316345
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,32,32,0,0.039243200421333314
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,64,8,0,0.11585439443588257
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,64,16,0,0.07527679800987244
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,64,4,0,0.20740799903869628
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,64,32,0,0.04740799963474274
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,64,2,0,0.3773535966873169
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,64,64,0,0.03476159870624542
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,64,1,0,0.730302381515503
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,64,2,0,0.38465280532836915
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,64,4,0,0.2127216100692749
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,64,1,0,0.7970143795013428
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,64,8,0,0.12135039567947388
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,64,2048,2,0,7.923158264160156
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,64,16,0,0.07428640127182007
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,64,32,0,0.051369601488113405
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,64,64,0,0.03913759887218475
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,128,4,0,0.3071104049682617
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,128,16,0,0.10560640096664428
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,128,2,0,0.5865392208099365
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,128,8,0,0.2042464017868042
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,128,32,0,0.07192000150680541
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,128,64,0,0.05139679908752441
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,128,1,0,1.1097599983215332
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,128,4,0,0.31308159828186033
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,128,16,0,0.11081440448760986
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,128,8,0,0.21104319095611573
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,128,2,0,0.5786287784576416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,128,32,0,0.07655839920043946
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,128,64,0,0.05759199857711792
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,128,1,0,1.1080368041992188
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,256,8,0,0.3094192028045654
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,256,16,0,0.1850559949874878
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,256,4,0,0.6015071868896484
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,256,32,0,0.12349920272827149
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,256,64,0,0.09019680023193359
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,256,2,0,1.0514240264892578
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,256,8,0,0.3271647930145264
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,256,16,0,0.19312479496002197
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,256,32,0,0.13280800580978394
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,256,64,0,0.09732319712638855
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,256,4,0,0.5628208160400391
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,256,1,0,2.0424383163452147
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,256,2,0,1.0580927848815918
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,512,16,0,0.3809231996536255
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,512,64,0,0.1764240026473999
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,512,32,0,0.24604480266571044
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,512,8,0,0.6578271865844727
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,512,4,0,1.2096847534179687
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,256,1,0,2.050271987915039
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,512,16,0,0.3954096078872681
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,512,32,0,0.25903360843658446
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,512,2,0,2.325651168823242
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,512,8,0,0.6709472179412842
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,512,64,0,0.18810399770736694
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,512,4,0,1.224955177307129
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,512,2,0,2.3313104629516603
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,128,1024,16,0,0.8598320007324218
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,128,1024,32,0,0.5278096199035645
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,128,1024,64,0,0.3721487998962402
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,128,1024,8,0,1.5268192291259766
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,128,512,1,0,4.641320037841797
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,128,1024,16,0,0.8996623992919922
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,128,1024,4,0,2.9102527618408205
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,128,512,1,0,4.649996948242188
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,128,1024,32,0,0.5712063789367676
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,128,1024,64,0,0.3907840013504028
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,16,4,0,0.2400576114654541
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,16,8,0,0.12728480100631714
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,128,1024,8,0,1.5563183784484864
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,16,2,0,0.4659152030944824
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,16,16,0,0.0736624002456665
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,16,32,0,0.04745280146598816
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,16,1,0,0.9497584342956543
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,16,64,0,0.032996800541877744
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,16,4,0,0.23865599632263185
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,16,2,0,0.4476304054260254
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,16,8,0,0.13632160425186157
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,16,16,0,0.07714080214500427
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,128,1024,4,0,2.896820831298828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,16,1,0,0.8733599662780762
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,16,32,0,0.04962719976902008
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,16,64,0,0.03705599904060364
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,128,1024,2,0,5.749947357177734
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,32,4,0,0.2871824026107788
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,32,8,0,0.15760960578918456
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,32,16,0,0.09126080274581909
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,32,2,0,0.5597360134124756
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,32,64,0,0.04115679860115051
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,32,32,0,0.06369119882583618
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,32,1,0,1.0594719886779784
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,32,4,0,0.29414560794830324
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,32,8,0,0.16292639970779418
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,32,2,0,0.554695987701416
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,32,16,0,0.10684959888458252
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,32,1,0,1.063582420349121
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,32,32,0,0.060052800178527835
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,32,64,0,0.0450111985206604
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,128,1024,2,0,5.782702255249023
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,64,8,0,0.21520159244537354
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,64,4,0,0.3924976110458374
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,64,16,0,0.12770240306854247
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,64,32,0,0.0805616021156311
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,64,2,0,0.7376736164093017
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,64,64,0,0.06582080125808716
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,64,1,0,1.4378111839294434
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,64,8,0,0.22168641090393065
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,64,16,0,0.13377280235290528
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,64,4,0,0.3956671953201294
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,64,32,0,0.09587200284004212
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,64,2,0,0.7435152053833007
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,64,64,0,0.06204479932785034
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,64,1,0,1.442319965362549
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,128,16,0,0.19468319416046143
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,128,32,0,0.12898080348968505
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,128,8,0,0.3360752105712891
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,128,4,0,0.5921840190887451
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,128,64,0,0.09237599968910218
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,128,2,0,1.1231840133666993
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,128,16,0,0.20348799228668213
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,128,8,0,0.33523359298706057
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,128,32,0,0.13425920009613038
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,128,64,0,0.10051039457321168
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,128,4,0,0.5992976188659668
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,128,2,0,1.136783981323242
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,128,1,0,2.1861440658569338
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,256,32,0,0.22762720584869384
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,256,8,0,0.6008880138397217
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,256,16,0,0.3524303913116455
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,256,64,0,0.16795040369033815
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,128,1,0,2.1896383285522463
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,256,4,0,1.093393611907959
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,256,2,0,2.082558441162109
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,256,32,0,0.24116160869598388
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,256,16,0,0.36503679752349855
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,256,8,0,0.6126016139984131
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,256,4,0,1.1060352325439453
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,256,64,0,0.18077119588851928
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,4,256,512,32,0,0.4693136215209961
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,256,2,0,2.0958351135253905
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,128,256,256,1,0,4.078545761108399
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,2,256,512,64,0,0.33228800296783445
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,16,256,512,8,0,1.301427173614502
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,8,256,512,16,0,0.7432591915130615
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,4,256,512,32,0,0.49210400581359864
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,128,256,256,1,0,4.093793487548828
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,32,256,512,4,0,2.4043312072753906
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,8,256,512,16,0,0.7673791885375977
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,16,256,512,8,0,1.3212464332580567
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,2,256,512,64,0,0.355511999130249
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,32,256,512,4,0,2.445822334289551
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,float16,64,256,512,2,0,4.716083145141601
SGLang,0.5.6.post2,NVIDIA B200,mla_context,trtllm_mla,float16,fp8,64,256,512,2,0,4.7365470886230465
