framework,version,device,op_name,kernel_source,mla_dtype,kv_cache_dtype,num_heads,batch_size,isl,tp_size,step,latency
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,1,0.010836800187826156
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,1,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,1,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,1,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,1,0.009859199821949004
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,1,0.009107200056314468
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,1,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,3,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,3,0.010979200154542923
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,3,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,3,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,3,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,3,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,3,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,1,0.008591999858617782
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,1,0.01045600026845932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,1,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,1,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,3,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,3,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,1,0.008671999722719193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,3,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,3,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,3,0.009393599629402161
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,3,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,3,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,1,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,1,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,7,0.011313600093126297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,7,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,7,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,7,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,7,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,7,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,7,0.008537600189447403
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,7,0.010675200074911118
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,7,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,7,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,7,0.010572800040245056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,7,0.008964800089597703
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,7,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,7,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,15,0.010673599690198899
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,15,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,15,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,15,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,15,0.010558400303125381
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,15,0.00981760025024414
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,15,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,15,0.009176000207662582
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,15,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,15,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,15,0.00945120006799698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,15,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,15,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,15,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,31,0.011311999708414077
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,31,0.01061279997229576
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,31,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,31,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,31,0.00958240032196045
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,31,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,31,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,31,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,31,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,31,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,31,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,31,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,31,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,31,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,63,0.011611200124025344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,63,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,63,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,63,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,63,0.009318400174379349
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,63,0.01064639985561371
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,63,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,63,0.010564800351858139
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,63,0.008716800063848496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,63,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,63,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,63,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,63,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,63,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,127,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,127,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,127,0.011449600011110306
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,127,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,127,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,127,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,127,0.008568000048398972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,127,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,127,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,127,0.010371199995279311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,127,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,127,0.008568000048398972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,127,0.009382399916648864
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,127,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,255,0.010585600137710571
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,255,0.011327999830245971
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,255,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,255,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,255,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,255,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,255,0.008580800145864487
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,255,0.010364799946546554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,255,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,255,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,255,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,255,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,255,0.009286399930715561
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,255,0.008619199693202972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,511,0.012964800000190735
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,511,0.012876799702644348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,511,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,511,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,511,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,511,0.011947199702262878
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,511,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,511,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,511,0.010595200210809707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,511,0.010579200088977813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,511,0.012569600343704223
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,511,0.011124800145626067
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,511,0.01053600013256073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,511,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,1023,0.013504000008106231
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,1023,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,1023,0.0124208003282547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,1023,0.012624000012874604
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,1023,0.010868799686431885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,1023,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,1023,0.012569600343704223
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,1023,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,1023,0.010627199709415436
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,1023,0.011416000127792359
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,1023,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,1023,0.012507200241088867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,1023,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,1023,0.010892800241708755
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,2047,0.01653759926557541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,2047,0.012835200130939483
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,2047,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,2047,0.012600000202655792
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,2047,0.014603200554847717
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,2047,0.012567999958992004
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,2047,0.013769599795341491
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,2047,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,2047,0.012430399656295776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,2047,0.0125231996178627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,2047,0.01260959953069687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,2047,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,2047,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,2047,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,4095,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,4095,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,4095,0.018804800510406495
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,4095,0.01467359960079193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,4095,0.01462399959564209
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,4095,0.01363999992609024
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,4095,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,4095,0.012755200266838074
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,4095,0.014468799531459808
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,4095,0.013484799861907959
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,4095,0.012620800733566284
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,4095,0.012852799892425538
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,4095,0.013686400651931763
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,4095,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,8191,0.01886879950761795
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,8191,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,8191,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,8191,0.017239999771118165
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,8191,0.01671839952468872
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,8191,0.017420800030231477
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,8191,0.016697600483894348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,8191,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,8191,0.016756799817085267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,8191,0.016944000124931337
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,8191,0.01669120043516159
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,8191,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,8191,0.016407999396324157
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,8191,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,16383,0.026868799328804018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,16383,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,16383,0.022750400006771088
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,16383,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,16383,0.021052800118923187
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,16383,0.024609600007534028
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,16383,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,16383,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,16383,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,16383,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,16383,0.022163200378417968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,16383,0.020817600190639496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,16383,0.018780800700187682
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,16383,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,32767,0.028641599416732787
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,32767,0.026678401231765746
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,32767,0.041247999668121337
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,32767,0.02650879919528961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,32767,0.02728480100631714
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,32767,0.026833599805831908
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,32767,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,32767,0.03086720108985901
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,32767,0.024905599653720856
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,32767,0.024747200310230255
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,32767,0.024798400700092316
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,32767,0.026796799898147584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,32767,0.024059200286865236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,32767,0.02441119998693466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,65535,0.0397136002779007
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,65535,0.031121599674224853
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,65535,0.035662400722503665
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,65535,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,65535,0.035051199793815616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,65535,0.02881920039653778
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,65535,0.043329599499702456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,65535,0.028964799642562867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,65535,0.03471679985523224
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,65535,0.02747359871864319
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,65535,0.02680639922618866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,65535,0.027088001370429993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,65535,0.037241598963737486
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,65535,0.03684639930725098
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,1,131071,0.059595197439193726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,2,131071,0.07037919759750366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,1,131071,0.039156800508499144
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,4,131071,0.05338240265846252
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,4,131071,0.035046398639678955
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,16,131071,0.05541920065879822
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,32,131071,0.05536320209503174
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,16,131071,0.033214399218559267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,1,0.011865600198507308
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,8,131071,0.05303040146827698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1,1,64,131071,0.055379199981689456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,32,131071,0.032974401116371156
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,8,131071,0.03368319869041443
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,2,131071,0.04161440134048462
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1,1,64,131071,0.03477280139923096
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,1,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,1,0.010526400059461594
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,1,0.010571199655532836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,1,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,1,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,1,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,1,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,3,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,1,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,1,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,1,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,1,0.010353600233793258
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,3,0.010364799946546554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,1,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,1,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,3,0.010699199885129929
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,3,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,3,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,3,0.010576000064611435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,3,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,3,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,3,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,3,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,3,0.009161599725484849
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,7,0.01067039966583252
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,3,0.008846399933099746
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,3,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,7,0.010648000240325927
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,3,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,7,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,7,0.010595200210809707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,7,0.01133280023932457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,7,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,7,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,7,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,7,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,7,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,7,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,15,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,7,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,15,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,7,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,15,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,7,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,15,0.011816000193357467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,15,0.011366400122642516
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,15,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,15,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,15,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,15,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,15,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,15,0.010360000282526016
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,15,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,15,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,15,0.008739200234413148
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,31,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,31,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,31,0.010705599933862687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,31,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,31,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,31,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,31,0.010524799674749374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,31,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,31,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,31,0.010529600083827972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,31,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,31,0.009491200000047684
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,31,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,63,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,31,0.00878399983048439
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,63,0.011955200135707856
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,63,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,63,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,63,0.010553599894046783
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,63,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,63,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,63,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,63,0.01034879982471466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,63,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,63,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,63,0.00888800024986267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,63,0.009019199758768082
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,63,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,127,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,127,0.011521600186824799
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,127,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,127,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,127,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,127,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,127,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,127,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,127,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,127,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,127,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,127,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,255,0.010766399651765823
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,127,0.008551999926567078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,127,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,255,0.010969600081443787
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,255,0.010993599891662598
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,255,0.009985599666833878
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,255,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,255,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,255,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,255,0.010639999806880952
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,255,0.008481600135564805
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,255,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,255,0.009560000151395798
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,255,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,255,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,511,0.012495999783277511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,255,0.00841279998421669
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,511,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,511,0.0125231996178627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,511,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,511,0.010628800094127654
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,511,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,511,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,511,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,511,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,511,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,511,0.010569600015878677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,511,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,511,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,1023,0.012511999905109405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,1023,0.01244639977812767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,511,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,1023,0.014427199959754944
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,1023,0.01064160019159317
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,1023,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,1023,0.010550399869680404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,1023,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,1023,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,1023,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,1023,0.010543999820947647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,1023,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,1023,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,2047,0.015411199629306793
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,1023,0.010566399991512298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,2047,0.014587199687957764
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,1023,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,2047,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,2047,0.010622400045394897
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,2047,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,2047,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,2047,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,2047,0.01446239948272705
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,2047,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,2047,0.010548800230026245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,2047,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,2047,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,2047,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,2047,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,4095,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,4095,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,4095,0.014742399752140044
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,4095,0.014476799964904785
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,4095,0.016673600673675536
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,4095,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,4095,0.014550399780273438
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,4095,0.014667199552059173
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,4095,0.014617599546909332
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,4095,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,8191,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,4095,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,4095,0.014705599844455719
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,4095,0.01446560025215149
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,4095,0.014496000111103058
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,8191,0.022896000742912294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,8191,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,8191,0.016729600727558136
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,8191,0.018822400271892546
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,8191,0.016832000017166136
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,8191,0.0168272003531456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,8191,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,8191,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,8191,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,8191,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,8191,0.015771199762821198
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,16383,0.03225919902324677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,8191,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,16383,0.026763200759887695
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,8191,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,16383,0.02468319982290268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,16383,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,16383,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,16383,0.022862400114536285
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,16383,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,16383,0.022651199996471406
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,16383,0.02131839990615845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,16383,0.02080480009317398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,16383,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,32767,0.03714880049228668
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,32767,0.029423999786376952
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,16383,0.021411199867725373
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,16383,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,32767,0.037031999230384825
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,32767,0.030987200140953065
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,16383,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,32767,0.029145601391792297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,32767,0.028841599822044373
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,32767,0.02680160105228424
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,32767,0.02884959876537323
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,32767,0.025839999318122864
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,32767,0.030958399176597595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,32767,0.026035198569297792
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,32767,0.024878400564193725
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,32767,0.02478239983320236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,32767,0.024910399317741395
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,65535,0.05736799836158753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,65535,0.05633760094642639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,65535,0.05246880054473877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,65535,0.052457600831985474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,65535,0.05470719933509827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,65535,0.05324000120162964
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,65535,0.05249119997024536
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,65535,0.03533119857311249
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,65535,0.033134400844573975
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,65535,0.030342400074005127
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,65535,0.03014560043811798
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,65535,0.030267199873924254
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,65535,0.02974080145359039
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,65535,0.02892000079154968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,1,131071,0.08411359786987305
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,2,1,64,131071,0.07887840270996094
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,2,131071,0.0923200011253357
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,4,131071,0.07796480059623719
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,8,131071,0.0781599998474121
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,16,131071,0.08011839985847473
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,32,131071,0.07938560247421264
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,1,131071,0.05339199900627136
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,2,131071,0.05159040093421936
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,1,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,4,131071,0.04641599953174591
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,8,131071,0.04533919990062714
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,16,131071,0.04755040109157562
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,32,131071,0.046768000721931456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,2,1,64,131071,0.04719040095806122
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,1,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,1,0.010550399869680404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,1,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,1,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,1,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,1,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,1,0.01061440035700798
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,1,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,1,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,1,0.009988799691200256
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,3,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,1,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,3,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,1,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,1,0.009415999799966813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,3,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,3,0.010567999631166457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,3,0.011710400134325028
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,3,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,3,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,3,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,3,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,3,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,3,0.010566399991512298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,3,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,3,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,3,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,7,0.010807999968528747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,7,0.011481600254774094
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,7,0.01088479980826378
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,7,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,7,0.01061440035700798
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,7,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,7,0.010576000064611435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,7,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,7,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,7,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,15,0.010556799918413162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,7,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,7,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,7,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,7,0.010224000364542008
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,15,0.010751999914646149
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,15,0.010635200142860412
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,15,0.01056160032749176
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,15,0.010571199655532836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,15,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,15,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,15,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,15,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,15,0.010371199995279311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,15,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,15,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,15,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,31,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,15,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,31,0.01051200032234192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,31,0.01175519973039627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,31,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,31,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,31,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,31,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,31,0.010547199845314026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,31,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,31,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,31,0.010361599922180175
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,31,0.00933919996023178
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,31,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,63,0.012588800489902496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,31,0.009507200121879578
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,63,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,63,0.010628800094127654
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,63,0.010555200278759003
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,63,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,63,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,63,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,63,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,127,0.010689599812030793
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,63,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,63,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,63,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,63,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,63,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,63,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,127,0.010992000252008438
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,127,0.010992000252008438
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,127,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,127,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,127,0.009017600119113922
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,127,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,127,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,127,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,127,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,127,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,127,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,127,0.00979039967060089
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,255,0.0112527996301651
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,127,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,255,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,255,0.011494400352239609
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,255,0.010545600205659866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,255,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,255,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,255,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,255,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,255,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,255,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,255,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,255,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,255,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,511,0.01356479972600937
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,255,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,511,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,511,0.01249919980764389
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,511,0.010831999778747558
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,511,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,511,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,511,0.01055999994277954
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,511,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,511,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,511,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,511,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,511,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,511,0.01051200032234192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,1023,0.014590400457382201
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,1023,0.01462240070104599
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,511,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,1023,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,1023,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,1023,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,1023,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,1023,0.012572799623012543
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,1023,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,1023,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,1023,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,1023,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,1023,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,1023,0.010572800040245056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,1023,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,2047,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,2047,0.014812800288200378
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,2047,0.016604800522327424
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,2047,0.014678399264812469
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,2047,0.013558399677276612
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,2047,0.013801600039005279
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,2047,0.014519999921321868
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,2047,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,2047,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,2047,0.014499199390411378
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,2047,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,2047,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,2047,0.014569599926471711
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,2047,0.012676799297332763
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,4095,0.022745600342750548
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,4095,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,4095,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,4095,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,4095,0.015123200416564942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,4095,0.015048000216484069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,4095,0.014678399264812469
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,4095,0.018632000684738158
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,4095,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,4095,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,4095,0.014644800126552582
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,4095,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,4095,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,4095,0.014494399726390838
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,8191,0.02332960069179535
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,8191,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,8191,0.031011199951171874
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,8191,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,8191,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,8191,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,8191,0.018745599687099455
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,8191,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,8191,0.01743679940700531
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,16383,0.035366401076316833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,8191,0.016873599588871004
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,8191,0.01894879937171936
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,8191,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,8191,0.016710400581359863
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,8191,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,16383,0.03287839889526367
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,16383,0.026846399903297423
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,16383,0.025201600790023804
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,16383,0.026577600836753847
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,16383,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,16383,0.023108799755573273
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,16383,0.022920000553131103
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,16383,0.021305599808692934
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,16383,0.0278656005859375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,16383,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,16383,0.020929600298404693
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,16383,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,16383,0.0208079993724823
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,32767,0.04746400117874146
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,32767,0.05150560140609741
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,32767,0.0488864004611969
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,32767,0.058894401788711546
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,32767,0.04936639964580536
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,32767,0.04775039851665497
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,32767,0.04794879853725433
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,32767,0.031892800331115724
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,32767,0.03304159939289093
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,32767,0.02619839906692505
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,65535,0.09506880044937134
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,32767,0.024931199848651886
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,32767,0.024830399453639983
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,32767,0.02483839988708496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,65535,0.07930560111999511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,32767,0.024833600223064422
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,65535,0.07402880191802978
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,65535,0.07278079986572265
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,65535,0.04929440021514893
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,65535,0.04155359864234924
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,65535,0.04247199892997742
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,65535,0.07341920137405396
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,65535,0.07241920232772828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,65535,0.07184640169143677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,1,131071,0.1328495979309082
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,65535,0.051844799518585206
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,65535,0.04312959909439087
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,65535,0.041387200355529785
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,65535,0.04177919924259186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,2,131071,0.16546080112457276
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,4,131071,0.12373440265655518
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,16,131071,0.12358720302581787
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,8,131071,0.12511359453201293
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,32,131071,0.1244655966758728
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,4,1,64,131071,0.123089599609375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,2,131071,0.08407679796218873
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,4,131071,0.06465920209884643
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,8,131071,0.06305440068244934
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,1,131071,0.07185760140419006
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,16,131071,0.06427839994430543
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,32,131071,0.06374880075454711
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,1,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,1,0.012425599992275238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,4,1,64,131071,0.06377120018005371
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,1,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,1,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,1,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,1,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,1,0.010353600233793258
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,1,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,1,0.010524799674749374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,1,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,1,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,1,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,1,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,1,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,3,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,3,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,3,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,3,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,3,0.01101439967751503
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,3,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,3,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,3,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,3,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,3,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,3,0.009470400214195252
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,3,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,3,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,3,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,7,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,7,0.01244639977812767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,7,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,7,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,7,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,7,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,7,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,7,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,7,0.012417600303888322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,7,0.010593599826097488
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,7,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,7,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,7,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,7,0.010294400155544281
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,15,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,15,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,15,0.012044800072908401
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,15,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,15,0.010620799660682679
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,15,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,15,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,15,0.011393599957227708
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,15,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,15,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,15,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,15,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,15,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,15,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,31,0.012486399710178375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,31,0.010768000036478043
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,31,0.010593599826097488
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,31,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,31,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,31,0.010545600205659866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,31,0.010356800258159637
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,31,0.010660800337791442
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,63,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,31,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,31,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,31,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,31,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,31,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,31,0.010361599922180175
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,63,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,63,0.010553599894046783
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,63,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,63,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,63,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,63,0.010532800108194351
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,63,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,63,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,127,0.012404800206422806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,63,0.01056319996714592
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,63,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,63,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,63,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,63,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,127,0.012676799297332763
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,127,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,127,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,127,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,127,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,127,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,127,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,127,0.010679999738931656
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,127,0.010567999631166457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,127,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,127,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,127,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,127,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,255,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,255,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,255,0.010920000076293946
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,255,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,255,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,255,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,255,0.010543999820947647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,255,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,255,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,255,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,255,0.010691200196743012
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,255,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,255,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,511,0.012417600303888322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,255,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,511,0.014556799829006196
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,511,0.014217600226402283
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,511,0.01242239996790886
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,511,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,511,0.012444800138473511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,511,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,511,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,511,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,511,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,511,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,511,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,511,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,1023,0.01356479972600937
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,511,0.010524799674749374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,1023,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,1023,0.01451520025730133
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,1023,0.014614400267601014
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,1023,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,1023,0.012750400602817536
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,1023,0.012513600289821625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,1023,0.014500799775123595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,1023,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,1023,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,1023,0.01234079971909523
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,1023,0.01144160032272339
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,1023,0.011220800131559372
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,1023,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,2047,0.020708799362182617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,2047,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,2047,0.016102400422096253
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,2047,0.015422399342060088
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,2047,0.014710399508476257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,2047,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,2047,0.014628799259662628
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,2047,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,2047,0.014499199390411378
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,2047,0.01451680064201355
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,2047,0.013742400705814362
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,2047,0.012627199292182922
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,2047,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,4095,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,2047,0.01247519999742508
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,4095,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,4095,0.018739199638366698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,4095,0.02903839945793152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,4095,0.016867199540138246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,4095,0.017852799594402315
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,4095,0.01661919951438904
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,4095,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,4095,0.018648000061511995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,4095,0.016329599916934966
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,4095,0.014542399346828461
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,4095,0.014822399616241455
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,4095,0.014596800506114959
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,4095,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,8191,0.02890399992465973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,8191,0.03308959901332855
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,8191,0.023280000686645506
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,8191,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,8191,0.022745600342750548
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,8191,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,8191,0.021692800521850585
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,8191,0.02492479979991913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,8191,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,8191,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,8191,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,8191,0.01876319944858551
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,8191,0.01680160015821457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,8191,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,16383,0.05114240050315857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,16383,0.05663999915122986
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,16383,0.04661119878292084
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,16383,0.04549440145492554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,16383,0.04530560076236725
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,16383,0.04495519995689392
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,16383,0.045256000757217404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,16383,0.030726400017738343
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,16383,0.03292959928512573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,16383,0.022759999334812164
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,16383,0.02271360009908676
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,16383,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,16383,0.02279199957847595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,32767,0.07979199886322022
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,16383,0.022380800545215608
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,32767,0.09385600090026855
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,32767,0.07316799759864807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,32767,0.07144799828529358
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,32767,0.07234560251235962
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,32767,0.07100319862365723
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,32767,0.07080479860305786
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,32767,0.05095999836921692
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,32767,0.04118880033493042
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,32767,0.046463999152183535
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,32767,0.03996320068836212
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,32767,0.04038879871368408
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,32767,0.03990240097045898
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,32767,0.0395823985338211
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,65535,0.13280160427093507
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,65535,0.16311839818954468
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,65535,0.12258720397949219
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,65535,0.12438720464706421
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,65535,0.12789759635925294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,65535,0.1274448037147522
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,65535,0.063128000497818
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,65535,0.06980000138282776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,65535,0.12814879417419434
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,65535,0.08379679918289185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,65535,0.06392160058021545
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,65535,0.06251680254936218
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,65535,0.06383839845657349
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,65535,0.0618399977684021
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,1,131071,0.24972960948944092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,16,131071,0.2221760034561157
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,2,131071,0.3040112018585205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,8,131071,0.22783200740814208
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,4,131071,0.22546401023864746
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,32,131071,0.2261199951171875
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,8,131071,0.1050976037979126
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,8,1,64,131071,0.22470400333404542
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,1,131071,0.11985280513763427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,2,131071,0.14868639707565307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,4,131071,0.10730079412460328
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,16,131071,0.10565119981765747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,1,0.012585599720478059
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,8,1,64,131071,0.10460159778594971
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,1,0.012627199292182922
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,32,131071,0.10468800067901611
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,1,0.012451200187206269
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,1,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,1,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,1,0.011523199826478958
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,1,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,1,0.010972800105810166
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,3,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,1,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,3,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,1,0.011913599818944931
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,1,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,1,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,1,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,1,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,3,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,3,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,3,0.01053759977221489
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,3,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,3,0.010611200332641601
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,3,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,3,0.011751999706029892
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,3,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,3,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,3,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,3,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,3,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,7,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,7,0.012585599720478059
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,7,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,7,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,7,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,7,0.010620799660682679
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,7,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,7,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,7,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,7,0.0124208003282547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,7,0.010542400181293488
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,7,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,7,0.010364799946546554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,7,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,15,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,15,0.012555199861526489
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,15,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,15,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,15,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,15,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,15,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,15,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,15,0.010576000064611435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,15,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,15,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,15,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,15,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,15,0.01055999994277954
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,31,0.012683199346065521
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,31,0.010555200278759003
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,31,0.011406400054693223
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,31,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,31,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,31,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,31,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,31,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,31,0.01071999967098236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,31,0.010595200210809707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,31,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,31,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,31,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,31,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,63,0.012667199969291687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,63,0.012577599287033081
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,63,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,63,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,63,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,63,0.01109279990196228
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,63,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,63,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,63,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,63,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,63,0.0103472001850605
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,127,0.011230400204658509
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,63,0.01056160032749176
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,63,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,127,0.010539200156927109
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,63,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,127,0.012161599844694138
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,127,0.013177600502967835
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,127,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,127,0.011585599929094314
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,127,0.012140800058841706
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,127,0.010691200196743012
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,255,0.012630400061607362
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,127,0.010628800094127654
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,127,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,255,0.010665600001811982
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,127,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,127,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,127,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,127,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,255,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,255,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,255,0.011281599849462509
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,255,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,255,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,255,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,255,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,255,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,255,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,255,0.010529600083827972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,255,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,255,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,511,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,511,0.014572800695896148
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,511,0.013902400434017182
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,511,0.012615999579429627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,511,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,511,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,511,0.012494400143623352
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,511,0.014531199634075165
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,511,0.012630400061607362
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,511,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,511,0.010606399923563003
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,511,0.010707200318574906
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,511,0.011059200018644333
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,1023,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,511,0.010894399881362916
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,1023,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,1023,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,1023,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,1023,0.014526399970054626
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,1023,0.014711999893188476
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,1023,0.014446400105953217
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,1023,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,1023,0.014494399726390838
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,1023,0.012692800164222718
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,1023,0.012417600303888322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,1023,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,1023,0.012408000230789185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,1023,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,2047,0.026814401149749756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,2047,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,2047,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,2047,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,2047,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,2047,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,2047,0.015423999726772308
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,2047,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,2047,0.01674720048904419
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,2047,0.014569599926471711
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,2047,0.014510400593280792
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,4095,0.022793599963188173
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,2047,0.01451520025730133
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,2047,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,2047,0.013424000144004822
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,4095,0.02956640124320984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,4095,0.03293280005455017
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,4095,0.021049599349498748
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,4095,0.020902399718761445
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,4095,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,4095,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,4095,0.022865599393844603
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,4095,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,4095,0.01855839937925339
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,4095,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,4095,0.016726399958133697
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,4095,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,4095,0.016711999475955964
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,8191,0.05041599869728088
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,8191,0.05453280210494995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,8191,0.045228800177574156
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,8191,0.04520640075206757
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,8191,0.044737601280212404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,8191,0.045238399505615236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,8191,0.04325920045375824
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,8191,0.02894560098648071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,8191,0.029840001463890077
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,8191,0.022726400196552275
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,8191,0.021931199729442595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,8191,0.021451200544834136
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,16383,0.07110239863395691
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,8191,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,8191,0.020694400370121
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,16383,0.09428640007972718
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,16383,0.04572319984436035
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,16383,0.07224159836769103
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,16383,0.07631040215492249
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,16383,0.07129600048065185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,16383,0.07149760127067566
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,16383,0.07008159756660462
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,16383,0.04992319941520691
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,16383,0.039575999975204466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,16383,0.0390720009803772
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,16383,0.03914560079574585
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,16383,0.03877280056476593
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,32767,0.13165600299835206
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,16383,0.03868800103664398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,32767,0.17017920017242433
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,32767,0.12831679582595826
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,32767,0.1258895993232727
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,32767,0.13038719892501832
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,32767,0.061294400691986085
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,32767,0.1255519986152649
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,32767,0.12316800355911255
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,32767,0.06952800154685974
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,32767,0.08526719808578491
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,32767,0.06176480054855347
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,32767,0.06156799793243408
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,32767,0.06016160249710083
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,32767,0.06021760106086731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,65535,0.23724958896636963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,65535,0.32701759338378905
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,65535,0.22747039794921875
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,65535,0.22575678825378417
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,65535,0.23117759227752685
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,65535,0.10518079996109009
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,65535,0.22841439247131348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,65535,0.2268224000930786
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,65535,0.118995201587677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,65535,0.1041759967803955
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,65535,0.10454399585723877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,65535,0.15662399530410767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,65535,0.10509920120239258
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,65535,0.10452959537506104
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,1,131071,0.4606448173522949
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,4,131071,0.42798237800598143
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,2,131071,0.6407055854797363
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,8,131071,0.42813758850097655
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,16,131071,0.4269536018371582
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,32,131071,0.4334559917449951
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,1,131071,0.22596640586853028
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,16,1,64,131071,0.4239823818206787
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,4,131071,0.19027680158615112
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,2,131071,0.3008239984512329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,32,131071,0.18862080574035645
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,8,131071,0.18889280557632446
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,16,131071,0.18863359689712525
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,1,0.014499199390411378
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,16,1,64,131071,0.1876368045806885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,1,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,1,0.0125231996178627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,1,0.012444800138473511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,1,0.01244639977812767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,1,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,1,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,1,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,3,0.014547200500965118
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,1,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,1,0.01449279934167862
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,1,0.010979200154542923
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,1,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,1,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,1,0.0122079998254776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,3,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,3,0.012742400169372559
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,3,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,3,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,3,0.012510399520397186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,3,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,3,0.012590399384498597
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,3,0.012489599734544754
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,3,0.01096000000834465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,7,0.012619200348854064
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,3,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,3,0.011095999926328658
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,3,0.010579200088977813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,3,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,7,0.014614400267601014
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,7,0.01244639977812767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,7,0.012403199821710587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,7,0.012444800138473511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,7,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,7,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,7,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,7,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,7,0.011259199678897857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,7,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,7,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,7,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,7,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,15,0.014572800695896148
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,15,0.01284639984369278
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,15,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,15,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,15,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,15,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,15,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,15,0.012724800407886505
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,15,0.012488000094890594
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,15,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,15,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,15,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,15,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,15,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,31,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,31,0.012782399356365205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,31,0.01268640011548996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,31,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,31,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,31,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,31,0.01045600026845932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,31,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,63,0.014727999269962311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,31,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,31,0.011609599739313126
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,31,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,31,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,31,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,31,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,63,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,63,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,63,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,63,0.01265760064125061
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,63,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,63,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,63,0.012483199685811996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,63,0.012513600289821625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,63,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,63,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,127,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,63,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,127,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,63,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,63,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,127,0.014558400213718414
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,127,0.01398719996213913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,127,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,127,0.010686399787664414
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,127,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,127,0.012783999741077422
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,127,0.01249919980764389
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,127,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,127,0.010835199803113937
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,127,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,127,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,127,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,255,0.014694400131702423
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,255,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,255,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,255,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,255,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,255,0.012521600723266602
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,255,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,255,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,255,0.012567999958992004
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,255,0.011112000048160552
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,255,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,255,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,255,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,511,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,255,0.010579200088977813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,511,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,511,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,511,0.014420799911022186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,511,0.013630400598049163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,511,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,511,0.013633599877357483
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,511,0.012379200011491776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,511,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,511,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,511,0.012577599287033081
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,511,0.012491200119256973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,511,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,511,0.01218079999089241
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,1023,0.026841598749160766
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,1023,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,1023,0.016678400337696075
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,1023,0.015695999562740325
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,1023,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,1023,0.016073599457740784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,1023,0.014609600603580474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,1023,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,1023,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,1023,0.01443839967250824
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,1023,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,1023,0.012566399574279786
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,1023,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,1023,0.012692800164222718
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,2047,0.02757120132446289
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,2047,0.03133119940757752
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,2047,0.02083680033683777
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,2047,0.0204352006316185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,2047,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,2047,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,2047,0.019094400107860565
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,2047,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,2047,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,2047,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,2047,0.016604800522327424
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,2047,0.014507199823856353
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,2047,0.01470080018043518
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,2047,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,4095,0.04923200011253357
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,4095,0.055524802207946776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,4095,0.04314880073070526
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,4095,0.04149119853973389
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,4095,0.042452800273895266
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,4095,0.04140639901161194
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,4095,0.04110240042209625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,4095,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,4095,0.02903839945793152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,4095,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,4095,0.019833600521087645
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,8191,0.07033439874649047
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,4095,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,4095,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,4095,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,8191,0.07609599828720093
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,8191,0.09497920274734498
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,8191,0.06920639872550964
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,8191,0.06876639723777771
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,8191,0.06819199919700622
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,8191,0.06694560050964356
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,8191,0.045270401239395144
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,8191,0.05103359818458557
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,8191,0.03781439960002899
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,8191,0.03681119978427887
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,16383,0.17141439914703369
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,8191,0.03701600134372711
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,8191,0.03658399879932404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,8191,0.037011200189590455
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,16383,0.12986079454421998
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,16383,0.12328959703445434
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,16383,0.08596000075340271
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,16383,0.12014720439910889
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,16383,0.12055679559707641
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,16383,0.11950080394744873
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,16383,0.05932160019874573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,16383,0.12222880125045776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,16383,0.05980799794197082
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,16383,0.06991360187530518
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,16383,0.059539198875427246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,16383,0.059552001953125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,16383,0.05950559973716736
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,32767,0.22531681060791015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,32767,0.24326720237731933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,32767,0.32717280387878417
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,32767,0.22942240238189698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,32767,0.22431840896606445
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,32767,0.2262511968612671
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,32767,0.22720320224761964
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,32767,0.12355200052261353
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,32767,0.1561728000640869
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,32767,0.10647679567337036
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,32767,0.1046239972114563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,32767,0.10490560531616211
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,32767,0.10452480316162109
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,32767,0.10481599569320679
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,1,65535,0.4736720085144043
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,4,65535,0.44009599685668943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,2,65535,0.638043212890625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,8,65535,0.4387343883514404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,16,65535,0.4351935863494873
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,32,65535,0.4384160041809082
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,32,1,64,65535,0.43783841133117674
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,1,65535,0.23398079872131347
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,2,65535,0.2996016025543213
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,4,65535,0.19630880355834962
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,8,65535,0.19411200284957886
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,16,65535,0.1949712038040161
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,1,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,32,65535,0.19455679655075073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,1,0.014588800072669984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,1,0.012695999443531036
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,32,1,64,65535,0.19458240270614624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,1,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,1,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,1,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,1,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,1,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,1,0.012646399438381195
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,1,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,1,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,1,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,1,0.010761599987745285
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,1,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,3,0.013812799751758576
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,3,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,3,0.012582400441169738
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,3,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,3,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,3,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,3,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,3,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,3,0.012622399628162384
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,3,0.012588800489902496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,3,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,3,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,3,0.010704000294208527
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,3,0.01069599986076355
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,7,0.014590400457382201
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,7,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,7,0.012574400007724761
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,7,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,7,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,7,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,7,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,7,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,7,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,7,0.012415999919176102
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,7,0.01058719977736473
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,7,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,7,0.010595200210809707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,7,0.010609599947929382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,15,0.014364799857139588
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,15,0.014608000218868256
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,15,0.013116799294948578
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,15,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,15,0.012582400441169738
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,15,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,15,0.012529599666595458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,15,0.012606400251388549
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,15,0.01260959953069687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,15,0.011804799735546111
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,15,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,15,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,15,0.010609599947929382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,15,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,31,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,31,0.014614400267601014
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,31,0.013120000064373017
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,31,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,31,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,31,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,31,0.01289760023355484
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,31,0.012435200065374375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,31,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,31,0.011638399958610535
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,31,0.010700800269842149
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,63,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,31,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,31,0.010580799728631973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,31,0.010676799714565277
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,63,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,63,0.012591999769210816
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,63,0.014616000652313232
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,63,0.010569600015878677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,63,0.01387840062379837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,63,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,63,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,63,0.01260959953069687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,63,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,127,0.013251200318336487
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,63,0.011606399714946748
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,63,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,63,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,63,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,127,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,127,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,127,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,127,0.012649600207805634
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,127,0.012556800246238708
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,127,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,127,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,127,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,127,0.012505599856376648
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,127,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,127,0.010553599894046783
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,255,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,127,0.010996799916028976
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,127,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,255,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,255,0.014537599682807923
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,255,0.012646399438381195
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,255,0.012404800206422806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,255,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,255,0.012566399574279786
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,255,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,255,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,255,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,255,0.010531199723482132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,255,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,255,0.01045600026845932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,255,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,511,0.022859199345111846
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,511,0.01863040030002594
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,511,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,511,0.014524799585342408
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,511,0.01459999978542328
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,511,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,511,0.014569599926471711
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,511,0.018724800646305086
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,511,0.014470399916172027
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,511,0.014713600277900696
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,511,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,511,0.012385600060224534
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,511,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,511,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,1023,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,1023,0.030155199766159057
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,1023,0.03138720095157623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,1023,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,1023,0.019592000544071196
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,1023,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,1023,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,1023,0.018771199882030486
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,1023,0.016551999747753142
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,1023,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,1023,0.01451680064201355
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,1023,0.014727999269962311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,1023,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,1023,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,2047,0.05130239725112915
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,2047,0.05544160008430481
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,2047,0.04392800033092499
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,2047,0.042217600345611575
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,2047,0.04284639954566956
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,2047,0.043136000633239746
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,2047,0.04131200015544891
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,2047,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,2047,0.030432000756263733
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,2047,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,4095,0.093094402551651
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,2047,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,2047,0.018651199340820313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,2047,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,2047,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,4095,0.0783407986164093
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,4095,0.0713375985622406
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,4095,0.06901119947433472
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,4095,0.03909760117530823
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,4095,0.06814240217208863
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,4095,0.03633440136909485
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,4095,0.06879519820213317
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,4095,0.06748800277709961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,4095,0.04724319875240326
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,4095,0.05009920001029968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,4095,0.03736799955368042
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,4095,0.0366703987121582
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,4095,0.03640480041503906
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,8191,0.13167200088500977
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,8191,0.17011040449142456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,8191,0.12425119876861572
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,8191,0.0854095995426178
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,8191,0.12486879825592041
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,8191,0.1217952013015747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,8191,0.12280000448226928
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,8191,0.12115679979324341
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,8191,0.07287200093269348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,8191,0.061684799194335935
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,8191,0.059622400999069215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,8191,0.059683197736740114
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,8191,0.059539198875427246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,8191,0.05952799916267395
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,16383,0.2428607940673828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,16383,0.22934401035308838
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,16383,0.22658240795135498
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,16383,0.2276927947998047
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,16383,0.323638391494751
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,16383,0.22739040851593018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,16383,0.12542400360107422
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,16383,0.22669761180877684
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,16383,0.1549999952316284
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,16383,0.10691519975662231
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,16383,0.10642880201339722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,16383,0.10491039752960205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,16383,0.10495519638061523
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,16383,0.10479999780654907
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,1,32767,0.4704927921295166
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,2,32767,0.6330175876617432
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,4,32767,0.4399119853973389
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,8,32767,0.43956480026245115
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,16,32767,0.43946080207824706
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,32,32767,0.43872637748718263
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,1,32767,0.23506081104278564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,64,1,64,32767,0.4360176086425781
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,2,32767,0.29967200756073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,4,32767,0.19759039878845214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,1,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,8,32767,0.1959887981414795
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,16,32767,0.19593919515609742
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,32,32767,0.19373760223388672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,1,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,64,1,64,32767,0.19457440376281737
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,1,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,1,0.014718399941921234
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,1,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,1,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,1,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,1,0.012491200119256973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,1,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,1,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,1,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,1,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,1,0.011150400340557098
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,1,0.010883200168609618
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,3,0.016758400201797485
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,3,0.02067199945449829
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,3,0.014577600359916686
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,3,0.013070400059223174
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,3,0.012670400738716125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,3,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,3,0.012664000689983367
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,3,0.016748799383640288
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,3,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,3,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,3,0.012403199821710587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,3,0.01674399971961975
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,3,0.010694400221109391
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,3,0.01061599999666214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,7,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,7,0.016896000504493712
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,7,0.014686399698257446
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,7,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,7,0.013823999464511872
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,7,0.01308320015668869
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,7,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,7,0.013627199828624726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,7,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,15,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,7,0.016572800278663636
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,7,0.01706240028142929
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,7,0.012577599287033081
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,7,0.01244639977812767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,7,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,15,0.016604800522327424
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,15,0.014692799746990204
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,15,0.014505599439144135
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,15,0.013568000495433807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,15,0.014497600495815277
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,15,0.014552000164985656
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,15,0.01114400029182434
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,15,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,15,0.01738079935312271
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,31,0.0146479994058609
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,15,0.012515200674533844
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,15,0.011964800208806992
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,31,0.014467200636863709
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,15,0.012505599856376648
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,15,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,31,0.01842080056667328
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,31,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,31,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,31,0.014683200418949128
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,31,0.01446239948272705
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,31,0.014203199744224548
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,31,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,63,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,63,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,31,0.012593600153923034
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,63,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,31,0.012596799433231354
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,31,0.011825600266456604
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,31,0.010542400181293488
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,31,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,63,0.016894400119781494
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,63,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,63,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,63,0.014494399726390838
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,63,0.016604800522327424
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,63,0.017340800166130065
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,63,0.012583999335765839
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,63,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,63,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,63,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,127,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,127,0.014526399970054626
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,63,0.010686399787664414
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,127,0.016732800006866454
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,127,0.02091519981622696
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,127,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,127,0.013760000467300415
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,127,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,127,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,127,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,127,0.012566399574279786
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,127,0.01249760016798973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,255,0.014630399644374847
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,127,0.010705599933862687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,255,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,127,0.010604800283908844
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,255,0.014608000218868256
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,127,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,255,0.018873600661754607
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,255,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,255,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,255,0.014569599926471711
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,255,0.01834239959716797
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,255,0.01720000058412552
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,255,0.012580800056457519
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,255,0.012415999919176102
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,255,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,255,0.01079839989542961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,255,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,511,0.028788799047470094
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,511,0.032785600423812865
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,511,0.020417599380016326
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,511,0.014500799775123595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,511,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,511,0.01855040043592453
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,511,0.016702400147914888
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,511,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,511,0.02082560062408447
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,511,0.021991999447345735
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,511,0.014590400457382201
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,511,0.014318400621414184
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,511,0.01265760064125061
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,511,0.01435679942369461
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,1023,0.05018879771232605
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,1023,0.04088639914989471
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,1023,0.05574719905853272
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,1023,0.043663999438285826
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,1023,0.04145120084285736
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,1023,0.04089600145816803
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,1023,0.0411296010017395
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,1023,0.02874560058116913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,1023,0.03198879957199097
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,1023,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,1023,0.018377600610256194
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,1023,0.017312000691890716
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,1023,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,1023,0.01671680063009262
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,2047,0.07684800028800964
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,2047,0.09611200094223023
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,2047,0.04735200107097626
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,2047,0.06914560198783874
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,2047,0.06795039772987366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,2047,0.06816800236701966
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,2047,0.06711680293083191
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,2047,0.06920480132102966
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,2047,0.051819199323654176
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,4095,0.1299504041671753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,2047,0.03802880048751831
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,2047,0.03566400110721588
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,2047,0.036508798599243164
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,2047,0.0357151985168457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,2047,0.03517760038375854
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,4095,0.17499200105667115
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,4095,0.07210400104522705
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,4095,0.12080639600753784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,4095,0.12322399616241456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,4095,0.11955039501190186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,4095,0.05877599716186523
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,4095,0.11934239864349365
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,4095,0.05858880281448364
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,4095,0.11962080001831055
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,4095,0.08870400190353393
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,4095,0.06087520122528076
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,4095,0.059627199172973634
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,8191,0.23086719512939452
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,8191,0.22746400833129882
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,4095,0.058011198043823244
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,8191,0.24160959720611572
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,8191,0.3275840044021606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,8191,0.22455201148986817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,8191,0.2256864070892334
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,8191,0.22590239048004152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,8191,0.10453599691390991
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,8191,0.12568479776382446
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,8191,0.15904959440231323
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,8191,0.1060256004333496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,8191,0.10438400506973267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,8191,0.10491199493408203
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,8191,0.10377440452575684
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,1,16383,0.47202720642089846
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,4,16383,0.44228639602661135
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,2,16383,0.6390128135681152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,8,16383,0.433516788482666
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,16,16383,0.43606081008911135
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,32,16383,0.43927998542785646
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,1,16383,0.234716796875
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,4,16383,0.19555360078811646
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,128,1,64,16383,0.43690080642700196
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,2,16383,0.30254559516906737
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,8,16383,0.19420000314712524
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,16,16383,0.19370880126953124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,128,1,64,16383,0.19465279579162598
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,1,0.02486719936132431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,1,0.03249439895153046
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,1,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,1,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,32,16383,0.19440000057220458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,1,0.018854400515556334
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,1,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,1,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,1,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,1,0.02486239969730377
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,1,0.026788800954818726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,1,0.017862400412559508
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,1,0.016417600214481354
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,1,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,1,0.015457600355148315
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,3,0.025472000241279602
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,3,0.03255040049552917
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,3,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,3,0.019424000382423402
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,3,0.019094400107860565
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,3,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,3,0.019089600443840025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,3,0.024699200689792634
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,3,0.026812800765037538
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,3,0.01711679995059967
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,3,0.01656319946050644
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,7,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,3,0.016513599455356597
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,3,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,3,0.01557600051164627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,7,0.025080001354217528
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,7,0.032902398705482484
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,7,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,7,0.018807999789714813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,7,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,7,0.015188799798488617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,7,0.01892320066690445
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,7,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,7,0.026766398549079896
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,7,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,7,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,7,0.014903999865055084
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,7,0.014508800208568573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,15,0.02542240023612976
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,15,0.0331824004650116
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,15,0.021583999693393707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,15,0.019019199907779692
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,15,0.020059199631214143
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,15,0.019115200638771056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,15,0.018771199882030486
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,15,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,15,0.026875200867652892
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,15,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,15,0.016697600483894348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,15,0.015304000675678253
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,15,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,15,0.015105600655078887
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,31,0.02653760015964508
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,31,0.03301759958267212
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,31,0.02070239931344986
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,31,0.021054400503635405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,31,0.02003999948501587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,31,0.019089600443840025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,31,0.020632000267505647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,31,0.02483839988708496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,31,0.026681599020957947
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,31,0.017635199427604675
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,31,0.01641920059919357
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,63,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,31,0.016446399688720702
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,31,0.015516799688339234
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,31,0.014705599844455719
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,63,0.026807999610900878
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,63,0.032876798510551454
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,63,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,63,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,63,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,63,0.020776000618934632
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,63,0.02486719936132431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,63,0.026494398713111877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,63,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,63,0.01635040044784546
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,63,0.014892800152301789
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,63,0.014575999975204468
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,63,0.015515199303627015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,127,0.01924320012331009
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,127,0.02707839906215668
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,127,0.033102399110794066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,127,0.020787200331687926
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,127,0.018904000520706177
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,127,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,127,0.015803200006484986
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,127,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,127,0.024715200066566467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,127,0.02683520019054413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,127,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,127,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,127,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,127,0.016174399852752687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,255,0.019315199553966524
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,255,0.03835839927196503
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,255,0.039284801483154295
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,255,0.02205599993467331
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,255,0.020603199303150178
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,255,0.01977279931306839
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,255,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,255,0.028830400109291075
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,255,0.026873600482940675
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,255,0.017526400089263917
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,255,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,255,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,255,0.015110400319099427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,255,0.016047999262809753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,511,0.05540800094604492
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,511,0.0602512001991272
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,511,0.043966400623321536
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,511,0.04135839939117432
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,511,0.04117920100688934
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,511,0.04131839871406555
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,511,0.04110240042209625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,511,0.03999359905719757
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,511,0.03898400068283081
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,1023,0.08496959805488587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,511,0.022649599611759184
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,511,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,511,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,1023,0.06774399876594543
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,511,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,511,0.01921440064907074
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,1023,0.1007472038269043
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,1023,0.07116320133209228
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,1023,0.06904320120811462
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,1023,0.06892160177230836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,1023,0.0666815996170044
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,1023,0.05500800013542175
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,1023,0.05770720243453979
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,1023,0.039182400703430174
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,1023,0.03744480013847351
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,1023,0.037067198753356935
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,1023,0.03692319989204407
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,1023,0.03637920022010803
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,2047,0.14058719873428344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,2047,0.11924639940261841
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,2047,0.17895840406417846
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,2047,0.09362879991531373
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,2047,0.06525599956512451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,2047,0.12418880462646484
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,2047,0.12090400457382203
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,2047,0.12074719667434693
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,2047,0.11951680183410644
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,2047,0.08249599933624267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,2047,0.061582398414611814
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,2047,0.06117759943008423
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,2047,0.06062560081481934
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,2047,0.059880000352859494
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,4095,0.25125761032104493
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,4095,0.3334160089492798
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,4095,0.23080000877380372
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,4095,0.22352960109710693
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,4095,0.22720000743865967
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,4095,0.2248528003692627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,4095,0.22528960704803466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,4095,0.13548959493637086
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,4095,0.16477279663085936
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,4095,0.10589920282363892
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,4095,0.11081119775772094
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,4095,0.10729279518127441
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,4095,0.10671679973602295
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,4095,0.10602240562438965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,1,8191,0.477188777923584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,4,8191,0.43772640228271487
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,8,8191,0.4372079849243164
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,2,8191,0.6393311977386474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,16,8191,0.43648638725280764
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,32,8191,0.43808159828186033
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,1,8191,0.2479856014251709
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,256,1,64,8191,0.43412318229675295
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,2,8191,0.30687680244445803
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,4,8191,0.20048320293426514
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,1,0.045256000757217404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,8,8191,0.19723520278930665
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,16,8191,0.19714560508728027
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,32,8191,0.19634079933166504
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,1,0.05360159873962402
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,256,1,64,8191,0.1961567997932434
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,1,0.02884800136089325
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,1,0.028860801458358766
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,1,0.033092799782752993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,1,0.029108801484107973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,1,0.02884959876537323
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,1,0.04110879898071289
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,1,0.026796799898147584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,1,0.04326559901237488
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,1,0.022750400006771088
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,1,0.022750400006771088
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,1,0.022801600396633148
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,1,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,3,0.029124799370765685
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,3,0.04676479995250702
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,3,0.05358399748802185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,3,0.03308799862861633
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,3,0.028880000114440918
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,3,0.028883200883865357
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,3,0.028863999247550964
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,3,0.04115839898586273
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,3,0.04359680116176605
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,3,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,3,0.02290399968624115
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,3,0.022808000445365906
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,3,0.022767999768257143
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,3,0.02215680032968521
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,7,0.04733439981937408
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,7,0.0540448009967804
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,7,0.03383040130138397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,7,0.029142400622367857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,7,0.02951200008392334
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,7,0.03046720027923584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,7,0.028886398673057555
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,7,0.043931201100349426
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,7,0.04121760129928589
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,7,0.02696320116519928
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,7,0.022724799811840057
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,7,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,7,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,7,0.02279520034790039
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,15,0.04737760126590729
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,15,0.054199999570846556
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,15,0.033292800188064575
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,15,0.03049600124359131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,15,0.02300799936056137
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,15,0.03043360114097595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,15,0.02935360074043274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,15,0.029342401027679443
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,15,0.0421968013048172
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,15,0.0436928004026413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,15,0.026910400390625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,15,0.022716799378395082
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,15,0.022697600722312927
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,15,0.02264000028371811
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,31,0.047284799814224246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,31,0.0541055977344513
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,31,0.04264320135116577
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,31,0.034980800747871396
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,31,0.02940639853477478
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,31,0.030671998858451843
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,31,0.030875200033187868
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,31,0.029627200961112977
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,31,0.04320000112056732
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,31,0.022388799488544463
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,31,0.026787200570106508
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,63,0.02948000133037567
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,31,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,31,0.022761599719524385
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,31,0.02287199944257736
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,63,0.04817759990692139
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,63,0.05570560097694397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,63,0.033287999033927915
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,63,0.029256001114845276
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,63,0.029422399401664735
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,63,0.029105600714683533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,63,0.04327360093593598
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,63,0.0443807989358902
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,63,0.027088001370429993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,63,0.02285439968109131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,63,0.022806400060653688
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,63,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,63,0.02157759964466095
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,127,0.029531198740005492
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,127,0.05091999769210816
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,127,0.06093599796295166
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,127,0.037555199861526486
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,127,0.03126879930496216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,127,0.030379199981689455
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,127,0.029145601391792297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,127,0.04532000124454498
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,127,0.045212799310684205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,127,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,127,0.022808000445365906
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,127,0.02282080054283142
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,127,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,127,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,255,0.04222080111503601
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,255,0.06620960235595703
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,255,0.06599199771881104
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,255,0.049830400943756105
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,255,0.043644800782203674
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,255,0.042320001125335696
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,255,0.041119998693466185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,255,0.05374079942703247
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,255,0.04800960123538971
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,255,0.029121598601341246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,255,0.02268799990415573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,255,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,255,0.02290080040693283
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,255,0.02272160053253174
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,511,0.09557279944419861
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,511,0.10160000324249267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,511,0.0781167984008789
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,511,0.07294560074806214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,511,0.07145280241966248
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,511,0.06987680196762085
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,511,0.06918560266494751
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,511,0.06748800277709961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,511,0.06782559752464294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,511,0.045097601413726804
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,511,0.039478400349617006
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,511,0.03906719982624054
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,511,0.039166399836540224
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,511,0.03825919926166534
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,1023,0.15234559774398804
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,1023,0.1720944046974182
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,1023,0.1287152051925659
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,1023,0.12220640182495117
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,1023,0.0690991997718811
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,1023,0.06374719738960266
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,1023,0.12252320051193237
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,1023,0.1227952003479004
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,1023,0.12158880233764649
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,1023,0.09479039907455444
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,1023,0.09936000108718872
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,1023,0.061844801902770995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,2047,0.3160752058029175
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,1023,0.061667197942733766
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,1023,0.06155520081520081
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,2047,0.22602241039276122
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,2047,0.2646559953689575
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,2047,0.2335360050201416
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,2047,0.22560160160064696
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,2047,0.11672799587249756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,2047,0.2226128101348877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,2047,0.22344319820404052
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,2047,0.14968160390853882
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,2047,0.10821919441223145
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,2047,0.16365599632263184
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,2047,0.11109600067138672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,2047,0.10908160209655762
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,2047,0.10833920240402221
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,1,4095,0.49186081886291505
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,2,4095,0.5906896114349365
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,4,4095,0.44304962158203126
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,8,4095,0.43432159423828126
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,16,4095,0.4334879875183105
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,32,4095,0.43167839050292967
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,1,4095,0.26294400691986086
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,2,4095,0.29624960422515867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,512,1,64,4095,0.43197121620178225
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,4,4095,0.20933120250701903
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,8,4095,0.20276799201965331
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,1,0.0824783980846405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,16,4095,0.2009360074996948
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,1,0.048065599799156186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,1,0.10165120363235473
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,32,4095,0.1999951958656311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,1,0.07603039741516113
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,1,0.05589439868927002
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,1,0.04937599897384644
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,1,0.04530879855155945
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,512,1,64,4095,0.19907360076904296
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,1,0.04748319983482361
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,1,0.04890719950199127
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,1,0.07890080213546753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,1,0.03715839982032776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,1,0.035836800932884216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,1,0.03506399989128113
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,1,0.035257598757743834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,3,0.08217120170593262
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,3,0.10098559856414795
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,3,0.0762336015701294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,3,0.07871840000152588
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,3,0.055801600217819214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,3,0.04943200051784515
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,3,0.04896000027656555
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,3,0.048728001117706296
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,3,0.04737760126590729
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,3,0.04525279998779297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,7,0.10341919660568237
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,3,0.03726080060005188
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,3,0.03511840105056763
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,3,0.03507040143013
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,3,0.034944000840187076
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,7,0.08262720108032226
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,7,0.05563039779663086
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,7,0.04938560128211975
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,7,0.04912480115890503
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,7,0.048172798752784726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,7,0.04750719964504242
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,7,0.07601760029792785
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,7,0.07881280183792114
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,7,0.0452239990234375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,7,0.03701919913291931
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,15,0.1025696039199829
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,7,0.03539359867572785
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,15,0.04908480048179627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,7,0.03501279950141907
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,7,0.03507519960403442
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,15,0.08226400017738342
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,15,0.056302398443222046
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,15,0.04947519898414612
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,15,0.04941120147705078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,15,0.04890719950199127
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,15,0.07635200023651123
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,15,0.08012160062789916
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,15,0.04533120095729828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,15,0.037124800682067874
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,15,0.035257598757743834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,15,0.03503519892692566
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,15,0.035175999999046324
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,31,0.08228480219841003
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,31,0.10356639623641968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,31,0.05931839942932129
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,31,0.081523197889328
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,31,0.05012480020523071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,31,0.049167999625205995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,31,0.048921599984169006
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,31,0.049414399266242984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,31,0.07727519869804382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,31,0.04532960057258606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,63,0.10609439611434937
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,31,0.0372624009847641
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,31,0.03601920008659363
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,31,0.03524320125579834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,31,0.035041600465774536
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,63,0.08292319774627685
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,63,0.06201440095901489
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,63,0.0513264000415802
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,63,0.04975999891757965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,63,0.04946880042552948
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,63,0.04889920055866241
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,63,0.07803360223770142
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,63,0.0824512004852295
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,63,0.04542239904403687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,63,0.03705280125141144
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,63,0.03509120047092438
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,63,0.035420799255371095
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,63,0.03499200046062469
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,127,0.0880895972251892
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,127,0.10807839632034302
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,127,0.06986560225486756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,127,0.057494401931762695
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,127,0.055343997478485105
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,127,0.05397760272026062
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,127,0.05447999835014343
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,127,0.07847520112991332
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,127,0.08376960158348083
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,127,0.05084480047225952
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,127,0.0389055997133255
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,127,0.03563520014286041
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,127,0.03515680134296417
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,255,0.1141968011856079
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,127,0.03500800132751465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,255,0.11912480592727662
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,255,0.0798255980014801
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,255,0.07193120121955872
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,255,0.06973599791526794
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,255,0.06948959827423096
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,255,0.06743519902229309
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,255,0.09036960005760193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,255,0.08370879888534546
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,255,0.05362399816513062
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,511,0.16665600538253783
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,255,0.04314720034599304
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,511,0.19381920099258423
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,255,0.041168001294136045
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,255,0.04012640118598938
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,255,0.040587198734283444
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,511,0.13344000577926635
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,511,0.12175199985504151
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,511,0.11964479684829712
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,511,0.1166640043258667
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,511,0.11704959869384765
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,511,0.11391839981079102
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,511,0.13237919807434081
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,511,0.07839360237121581
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,511,0.06554399728775025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,511,0.06333760023117066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,511,0.061793601512908934
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,511,0.06145439743995666
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,1023,0.27634880542755125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,1023,0.3393327951431274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,1023,0.22822558879852295
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,1023,0.21770880222320557
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,1023,0.215828800201416
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,1023,0.21402719020843505
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,1023,0.21451680660247802
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,1023,0.16764479875564575
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,1023,0.18763999938964843
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,1023,0.12135200500488282
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,1023,0.10436639785766602
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,1023,0.10981600284576416
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,1023,0.10684319734573364
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,1023,0.10514559745788574
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,1,2047,0.49453439712524416
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,2,2047,0.6183167934417725
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,4,2047,0.42478718757629397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,16,2047,0.40945119857788087
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,8,2047,0.4125472068786621
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,32,2047,0.4090559959411621
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,1,1024,1,64,2047,0.4078527927398682
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,1,2047,0.2801408052444458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,2,2047,0.31602399349212645
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,4,2047,0.21003999710083007
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,8,2047,0.19754719734191895
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,1,0.010633599758148194
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,16,2047,0.19422559738159179
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,32,2047,0.19280320405960083
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,1,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,1,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,1,0.010310400277376175
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,1,0.010576000064611435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,1,1024,1,64,2047,0.19132959842681885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,1,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,1,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,1,0.008876799792051315
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,1,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,1,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,1,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,3,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,1,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,1,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,1,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,3,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,3,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,3,0.011553599685430526
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,3,0.011340799927711486
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,3,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,3,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,3,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,3,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,3,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,3,0.010371199995279311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,3,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,7,0.010326399654150008
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,3,0.010097599774599075
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,3,0.00862239971756935
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,7,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,7,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,7,0.01106560006737709
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,7,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,7,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,7,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,7,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,15,0.01058880016207695
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,7,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,7,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,7,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,7,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,7,0.009355200082063675
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,7,0.009782399982213974
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,15,0.010593599826097488
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,15,0.01061440035700798
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,15,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,15,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,15,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,15,0.010371199995279311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,15,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,15,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,15,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,15,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,15,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,15,0.009033600240945816
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,15,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,31,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,31,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,31,0.010691200196743012
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,31,0.010900799930095673
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,31,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,31,0.009388799965381622
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,31,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,31,0.009452799707651139
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,31,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,31,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,31,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,31,0.008540800213813782
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,31,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,31,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,63,0.011574400216341018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,63,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,63,0.010566399991512298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,63,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,63,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,63,0.009974399954080582
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,63,0.010550399869680404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,63,0.010595200210809707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,63,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,63,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,63,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,63,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,63,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,63,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,127,0.010358399897813796
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,127,0.010566399991512298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,127,0.010993599891662598
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,127,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,127,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,127,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,127,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,127,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,127,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,127,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,255,0.010598400235176086
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,127,0.010215999931097031
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,127,0.00870240032672882
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,127,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,127,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,255,0.011766400188207626
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,255,0.010793600231409073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,255,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,255,0.009622400254011154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,255,0.009139200299978256
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,255,0.010360000282526016
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,255,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,255,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,255,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,255,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,255,0.008481600135564805
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,511,0.012214399874210358
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,255,0.00936800017952919
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,255,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,511,0.012649600207805634
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,511,0.013619199395179749
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,511,0.01348160058259964
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,511,0.01334560066461563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,511,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,511,0.011675199866294861
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,511,0.01053600013256073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,511,0.012414400279521943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,511,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,511,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,511,0.0124208003282547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,511,0.011339200288057327
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,511,0.011099199950695037
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,1023,0.014668799936771393
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,1023,0.014183999598026275
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,1023,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,1023,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,1023,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,1023,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,1023,0.012390399724245072
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,1023,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,1023,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,1023,0.012732799351215362
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,1023,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,1023,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,1023,0.010908800363540649
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,1023,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,2047,0.01660960018634796
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,2047,0.01467519998550415
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,2047,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,2047,0.014494399726390838
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,2047,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,2047,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,2047,0.012412799894809723
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,2047,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,2047,0.01464959979057312
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,2047,0.013291199505329133
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,2047,0.012582400441169738
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,2047,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,2047,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,2047,0.01252640038728714
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,4095,0.02060000002384186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,4095,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,4095,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,4095,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,4095,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,4095,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,4095,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,4095,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,4095,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,4095,0.014575999975204468
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,4095,0.014609600603580474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,4095,0.013939200341701508
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,4095,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,4095,0.012664000689983367
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,8191,0.01672320067882538
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,8191,0.026675200462341307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,8191,0.01764480024576187
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,8191,0.020678399503231047
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,8191,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,8191,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,8191,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,8191,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,8191,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,8191,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,8191,0.016758400201797485
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,8191,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,16383,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,8191,0.015272000432014465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,16383,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,8191,0.01658879965543747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,16383,0.039124798774719236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,16383,0.027291199564933775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,16383,0.024087999761104584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,16383,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,16383,0.020759999752044678
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,16383,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,16383,0.026814401149749756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,16383,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,16383,0.020759999752044678
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,16383,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,16383,0.018649600446224213
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,16383,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,32767,0.03291999995708465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,32767,0.04043839871883392
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,32767,0.03089439868927002
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,32767,0.028115200996398925
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,32767,0.02683199942111969
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,32767,0.02690880000591278
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,32767,0.02651360034942627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,32767,0.03372960090637207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,32767,0.02728799879550934
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,32767,0.024769599735736846
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,32767,0.024809600412845613
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,32767,0.02476799935102463
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,32767,0.024607999622821806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,32767,0.024267199635505676
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,65535,0.037062400579452516
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,65535,0.04025599956512451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,65535,0.04506880044937134
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,65535,0.028838399052619933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,65535,0.03519040048122406
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,65535,0.03504959940910339
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,65535,0.035278400778770445
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,65535,0.036796799302101134
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,65535,0.035576000809669495
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,65535,0.030947199463844298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1,1,2,131071,0.059627199172973634
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,65535,0.028948798775672913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,65535,0.028188800811767577
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,65535,0.027088001370429993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1,1,32,131071,0.055009597539901735
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,65535,0.026815998554229736
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1,1,1,131071,0.05502560138702393
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1,1,4,131071,0.07124959826469421
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1,1,16,131071,0.05331519842147827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1,1,8,131071,0.0535968005657196
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1,1,64,131071,0.05575360059738159
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1,1,1,131071,0.042427200078964236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1,1,2,131071,0.03919520080089569
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1,1,4,131071,0.041371199488639834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1,1,8,131071,0.0350847989320755
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,1,0.01058880016207695
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1,1,16,131071,0.03320800065994263
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1,1,32,131071,0.03275200128555298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1,1,64,131071,0.033024001121521
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,1,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,1,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,1,0.010601600259542465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,1,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,1,0.011631999909877778
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,1,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,1,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,1,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,1,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,1,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,1,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,1,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,1,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,3,0.01242239996790886
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,3,0.011606399714946748
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,3,0.010548800230026245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,3,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,3,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,3,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,3,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,3,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,3,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,3,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,3,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,3,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,3,0.00851999968290329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,7,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,3,0.00931520015001297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,7,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,7,0.010580799728631973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,7,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,7,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,7,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,7,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,7,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,15,0.010601600259542465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,7,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,15,0.010691200196743012
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,7,0.010526400059461594
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,7,0.01034879982471466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,15,0.010580799728631973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,7,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,7,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,7,0.010328000038862228
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,15,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,15,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,15,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,15,0.008980800211429597
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,15,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,15,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,15,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,15,0.010542400181293488
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,15,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,15,0.00867680013179779
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,15,0.00928800031542778
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,31,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,31,0.0127920001745224
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,31,0.011043199896812439
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,31,0.010809600353240967
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,31,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,31,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,31,0.008619199693202972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,31,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,31,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,31,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,31,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,31,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,31,0.009043200314044953
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,31,0.00857120007276535
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,63,0.010710400342941285
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,63,0.011999999731779098
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,63,0.010529600083827972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,63,0.010548800230026245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,63,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,63,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,63,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,63,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,63,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,63,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,63,0.010526400059461594
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,127,0.011195199936628342
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,63,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,63,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,127,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,63,0.009086400270462036
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,127,0.010807999968528747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,127,0.010732799768447876
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,127,0.010704000294208527
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,127,0.010345599800348281
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,127,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,127,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,127,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,255,0.01061440035700798
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,127,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,127,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,127,0.009001599997282029
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,255,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,127,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,127,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,255,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,255,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,255,0.011048000305891037
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,255,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,255,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,255,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,511,0.01316480040550232
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,511,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,255,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,511,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,255,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,255,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,255,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,255,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,255,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,511,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,511,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,511,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,511,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,511,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,511,0.01210239976644516
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,511,0.011630400270223617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,511,0.010900799930095673
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,511,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,511,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,511,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,1023,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,1023,0.014263999462127686
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,1023,0.012644800543785095
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,1023,0.012606400251388549
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,1023,0.010611200332641601
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,1023,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,1023,0.01045600026845932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,1023,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,1023,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,1023,0.010740800201892853
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,1023,0.010913600027561188
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,1023,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,1023,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,1023,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,2047,0.016715200245380403
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,2047,0.015212799608707427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,2047,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,2047,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,2047,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,2047,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,2047,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,2047,0.014724799990653991
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,2047,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,2047,0.013264000415802002
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,2047,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,2047,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,2047,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,2047,0.010524799674749374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,4095,0.022724799811840057
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,4095,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,4095,0.016651199758052827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,4095,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,4095,0.014556799829006196
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,4095,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,4095,0.014660799503326416
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,4095,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,4095,0.015755200386047365
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,4095,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,4095,0.014744000136852264
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,4095,0.01446399986743927
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,4095,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,4095,0.014467200636863709
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,8191,0.030972799658775328
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,8191,0.016816000640392303
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,8191,0.02285120040178299
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,8191,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,8191,0.018755200505256652
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,8191,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,8191,0.016761599481105803
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,8191,0.022358399629592896
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,8191,0.018751999735832213
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,8191,0.016935999691486358
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,8191,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,8191,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,8191,0.01671680063009262
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,8191,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,16383,0.02824159860610962
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,16383,0.03136799931526184
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,16383,0.024956800043582916
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,16383,0.0227183997631073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,16383,0.02370399981737137
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,16383,0.022752000391483305
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,16383,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,16383,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,16383,0.028254398703575136
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,16383,0.021835200488567352
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,16383,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,16383,0.02104640007019043
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,16383,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,16383,0.020531199872493744
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,32767,0.03468160033226013
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,32767,0.03713119924068451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,32767,0.037110400199890134
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,32767,0.031038400530815125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,32767,0.028944000601768494
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,32767,0.029252800345420837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,32767,0.02892960011959076
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,32767,0.032467201352119446
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,32767,0.03095200061798096
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,32767,0.026412799954414368
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,32767,0.02677280008792877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,32767,0.02629919946193695
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,32767,0.026800000667572023
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,32767,0.024854399263858795
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,65535,0.04979360103607178
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,65535,0.05716000199317932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,65535,0.05697600245475769
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,65535,0.052249598503112796
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,65535,0.05308960080146789
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,65535,0.05583999752998352
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,65535,0.0533407986164093
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,65535,0.036723199486732486
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,65535,0.03560320138931274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,65535,0.033155199885368344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,65535,0.030870398879051207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,65535,0.030876800417900085
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,65535,0.030665600299835206
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,65535,0.028884801268577575
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,2,1,1,131071,0.07676479816436768
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,2,1,64,131071,0.07786080241203308
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,2,1,2,131071,0.08373600244522095
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,2,1,8,131071,0.07870240211486816
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,2,1,4,131071,0.09283199906349182
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,2,1,16,131071,0.07783039808273315
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,2,1,32,131071,0.07939199805259704
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,2,1,32,131071,0.047758400440216064
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,2,1,1,131071,0.05172320008277893
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,2,1,2,131071,0.05340480208396912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,2,1,4,131071,0.052716797590255736
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,2,1,8,131071,0.04582239985466004
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,2,1,16,131071,0.045788800716400145
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,2,1,64,131071,0.04713920056819916
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,1,0.012451200187206269
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,1,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,1,0.010583999752998351
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,1,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,1,0.010675200074911118
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,1,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,1,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,1,0.01215839982032776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,1,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,1,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,1,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,1,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,1,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,1,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,3,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,3,0.01056160032749176
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,3,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,3,0.011126399785280228
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,3,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,3,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,3,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,3,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,3,0.011846400052309036
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,3,0.010552000254392624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,3,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,3,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,3,0.00888800024986267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,3,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,7,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,7,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,7,0.01091040000319481
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,7,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,7,0.010601600259542465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,7,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,7,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,7,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,7,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,7,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,7,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,7,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,7,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,7,0.008958400040864945
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,15,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,15,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,15,0.010689599812030793
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,15,0.010619200021028518
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,15,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,15,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,15,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,15,0.011051200330257416
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,15,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,15,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,15,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,15,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,15,0.009147199988365173
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,15,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,31,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,31,0.012443199753761292
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,31,0.01063840016722679
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,31,0.010676799714565277
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,31,0.011553599685430526
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,31,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,31,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,31,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,31,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,63,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,31,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,31,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,31,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,31,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,31,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,63,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,63,0.010820800065994262
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,63,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,63,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,63,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,63,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,63,0.010630399733781815
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,63,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,63,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,63,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,63,0.010337600111961364
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,63,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,63,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,127,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,127,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,127,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,127,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,127,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,127,0.011151999980211259
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,127,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,127,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,127,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,127,0.01085439994931221
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,127,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,127,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,127,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,127,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,255,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,255,0.012404800206422806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,255,0.010699199885129929
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,255,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,255,0.01045600026845932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,255,0.010502400249242783
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,255,0.010585600137710571
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,255,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,255,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,255,0.010524799674749374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,511,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,255,0.010739199817180634
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,255,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,511,0.01096320003271103
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,255,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,511,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,255,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,511,0.012734399735927581
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,511,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,511,0.012615999579429627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,511,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,511,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,511,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,511,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,511,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,511,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,511,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,511,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,1023,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,1023,0.01454399973154068
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,1023,0.014472000300884247
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,1023,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,1023,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,1023,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,1023,0.012521600723266602
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,1023,0.012380799651145935
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,1023,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,1023,0.010673599690198899
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,1023,0.014553600549697876
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,1023,0.010552000254392624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,1023,0.01045600026845932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,1023,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,2047,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,2047,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,2047,0.014655999839305878
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,2047,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,2047,0.015107199549674988
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,2047,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,2047,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,2047,0.01379680037498474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,2047,0.01653600037097931
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,2047,0.01443839967250824
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,2047,0.01464959979057312
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,2047,0.012620800733566284
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,2047,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,4095,0.014655999839305878
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,2047,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,4095,0.029032000899314882
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,4095,0.02267040014266968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,4095,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,4095,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,4095,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,4095,0.014707200229167938
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,4095,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,4095,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,4095,0.015992000699043274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,4095,0.014524799585342408
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,4095,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,8191,0.020428800582885744
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,4095,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,8191,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,4095,0.013172799348831176
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,8191,0.026795199513435362
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,8191,0.030979201197624207
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,8191,0.02300959974527359
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,8191,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,8191,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,8191,0.023636800050735474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,8191,0.02268799990415573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,8191,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,8191,0.017664000391960144
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,8191,0.016700799763202667
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,8191,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,8191,0.016524800658226015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,16383,0.03189600110054016
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,16383,0.032971200346946714
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,16383,0.03496319949626923
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,16383,0.026707199215888978
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,16383,0.024833600223064422
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,16383,0.02494560033082962
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,16383,0.024843199551105498
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,16383,0.028302401304244995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,16383,0.02871359884738922
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,32767,0.04923200011253357
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,16383,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,16383,0.020791999995708466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,16383,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,16383,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,16383,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,32767,0.05191519856452942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,32767,0.05767679810523987
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,32767,0.04939199984073639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,32767,0.048783999681472776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,32767,0.04916160106658936
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,32767,0.034488001465797426
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,32767,0.047860801219940186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,32767,0.030988800525665283
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,32767,0.03309119939804077
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,65535,0.07671200037002564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,32767,0.025491198897361754
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,32767,0.02480800002813339
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,32767,0.02614719867706299
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,32767,0.0248416006565094
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,65535,0.0788976013660431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,65535,0.07197120189666747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,65535,0.0949679970741272
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,65535,0.07390239834785461
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,65535,0.07379519939422607
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,65535,0.07253280282020569
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,65535,0.05044159889221191
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,65535,0.048670399188995364
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,65535,0.05225759744644165
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,65535,0.04313279986381531
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,65535,0.04171839952468872
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,65535,0.04243200123310089
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,65535,0.042008000612258914
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,4,1,1,131071,0.1362336039543152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,4,1,16,131071,0.12400640249252319
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,4,1,2,131071,0.13412959575653077
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,4,1,4,131071,0.16613119840621948
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,4,1,8,131071,0.12575520277023317
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,4,1,32,131071,0.12558560371398925
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,4,1,64,131071,0.12338399887084961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,4,1,1,131071,0.08360639810562134
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,4,1,2,131071,0.07202879786491394
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,4,1,64,131071,0.06363679766654969
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,4,1,4,131071,0.0842736005783081
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,4,1,8,131071,0.06380640268325806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,4,1,16,131071,0.06354399919509887
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,4,1,32,131071,0.06444799900054932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,1,0.012591999769210816
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,1,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,1,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,1,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,1,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,1,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,1,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,1,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,1,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,1,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,1,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,1,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,3,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,1,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,1,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,3,0.01260959953069687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,3,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,3,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,3,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,3,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,3,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,3,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,3,0.012511999905109405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,7,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,3,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,7,0.010699199885129929
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,3,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,3,0.009534399956464767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,3,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,3,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,7,0.012588800489902496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,7,0.011113599687814713
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,7,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,7,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,7,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,7,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,7,0.012617599964141846
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,7,0.012406399846076966
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,15,0.010571199655532836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,7,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,7,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,15,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,7,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,7,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,15,0.012566399574279786
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,15,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,15,0.012225600332021714
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,15,0.010580799728631973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,15,0.01051200032234192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,15,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,15,0.011687999963760376
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,15,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,31,0.010620799660682679
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,15,0.010532800108194351
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,31,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,15,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,15,0.010344000160694122
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,15,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,31,0.012478400021791458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,31,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,31,0.01058719977736473
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,31,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,31,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,31,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,31,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,31,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,31,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,63,0.011713600158691407
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,31,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,31,0.010777600109577179
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,31,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,63,0.012583999335765839
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,63,0.012507200241088867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,63,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,63,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,63,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,63,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,63,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,63,0.010609599947929382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,63,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,63,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,63,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,63,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,63,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,127,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,127,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,127,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,127,0.011078400164842605
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,127,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,127,0.010683199763298035
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,127,0.01053600013256073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,127,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,127,0.010590399801731109
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,127,0.010569600015878677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,127,0.010571199655532836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,127,0.009547200053930283
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,127,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,127,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,255,0.012588800489902496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,255,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,255,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,255,0.012003199756145477
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,255,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,255,0.01055999994277954
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,255,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,255,0.011377599835395814
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,255,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,255,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,255,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,255,0.01051200032234192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,255,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,255,0.010556799918413162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,511,0.016681599617004394
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,511,0.014734399318695069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,511,0.013153600692749023
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,511,0.012675200402736665
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,511,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,511,0.012443199753761292
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,511,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,511,0.014521600306034088
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,511,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,511,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,511,0.01053439974784851
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,511,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,511,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,511,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,1023,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,1023,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,1023,0.016601599752902985
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,1023,0.012571200728416443
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,1023,0.01454399973154068
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,1023,0.01451520025730133
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,1023,0.01268640011548996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,1023,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,1023,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,1023,0.01451520025730133
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,1023,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,1023,0.011297599971294403
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,1023,0.011524800211191177
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,1023,0.011020799726247787
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,2047,0.027500799298286437
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,2047,0.020791999995708466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,2047,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,2047,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,2047,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,2047,0.014683200418949128
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,2047,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,2047,0.014519999921321868
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,2047,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,2047,0.014628799259662628
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,2047,0.01449279934167862
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,2047,0.01446560025215149
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,2047,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,2047,0.012612800300121307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,4095,0.024771200120449068
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,4095,0.0290367990732193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,4095,0.02277279943227768
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,4095,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,4095,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,4095,0.014720000326633453
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,4095,0.01788160055875778
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,4095,0.01658879965543747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,4095,0.02171359956264496
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,4095,0.018824000656604768
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,4095,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,4095,0.01650879979133606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,4095,0.015000000596046448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,4095,0.014508800208568573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,8191,0.03102560043334961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,8191,0.028988799452781676
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,8191,0.034944000840187076
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,8191,0.024732799828052522
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,8191,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,8191,0.02271520048379898
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,8191,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,8191,0.026571199297904968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,8191,0.024707199633121492
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,8191,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,8191,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,8191,0.018745599687099455
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,8191,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,8191,0.018534399569034576
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,16383,0.048456001281738284
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,16383,0.05140479803085327
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,16383,0.05762240290641785
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,16383,0.0455951988697052
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,16383,0.046054399013519286
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,16383,0.04630720019340515
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,16383,0.04532960057258606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,16383,0.035025599598884585
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,16383,0.030982398986816408
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,16383,0.03297759890556336
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,16383,0.02327519953250885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,16383,0.022830399870872497
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,16383,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,16383,0.02146719992160797
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,32767,0.07646880149841309
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,32767,0.0717087984085083
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,32767,0.08204799890518188
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,32767,0.09686400294303894
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,32767,0.07381759881973267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,32767,0.07232480049133301
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,32767,0.07345280051231384
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,32767,0.049646401405334474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,32767,0.04660319983959198
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,32767,0.05053439736366272
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,32767,0.04095999896526337
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,32767,0.040380799770355226
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,32767,0.040622401237487796
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,32767,0.039452800154685976
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,65535,0.13538399934768677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,65535,0.13129279613494874
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,65535,0.1636031985282898
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,65535,0.124835205078125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,65535,0.12278560400009156
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,65535,0.12759519815444947
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,65535,0.12534079551696778
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,65535,0.08456479907035827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,65535,0.06999679803848266
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,65535,0.08290879726409912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,65535,0.06366559863090515
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,65535,0.06270880103111268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,65535,0.06313279867172242
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,65535,0.06384959816932678
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,8,1,1,131071,0.2555727958679199
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,8,1,2,131071,0.24550080299377441
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,8,1,4,131071,0.3037008047103882
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,8,1,64,131071,0.22790880203247071
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,8,1,8,131071,0.2285167932510376
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,8,1,16,131071,0.22185280323028564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,8,1,4,131071,0.14869760274887084
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,8,1,32,131071,0.2288383960723877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,8,1,1,131071,0.1556864023208618
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,8,1,32,131071,0.10563360452651978
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,8,1,2,131071,0.11950080394744873
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,8,1,8,131071,0.10665119886398315
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,8,1,16,131071,0.10528960227966308
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,1,0.014483200013637542
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,8,1,64,131071,0.10500320196151733
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,1,0.012476799637079239
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,1,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,1,0.012433599680662155
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,1,0.011233600229024887
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,1,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,1,0.010574399679899215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,1,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,1,0.012628799676895142
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,1,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,1,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,1,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,1,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,1,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,3,0.014552000164985656
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,3,0.012859199941158295
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,3,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,3,0.01244639977812767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,3,0.012444800138473511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,3,0.010529600083827972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,3,0.010547199845314026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,3,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,3,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,3,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,3,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,3,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,3,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,3,0.010371199995279311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,7,0.014481599628925323
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,7,0.012748800218105316
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,7,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,7,0.01122559979557991
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,7,0.011608000099658965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,7,0.011009600013494492
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,7,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,7,0.012443199753761292
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,7,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,7,0.011832000315189361
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,7,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,7,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,7,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,15,0.011503999680280685
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,7,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,15,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,15,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,15,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,15,0.012614400684833526
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,15,0.012324800342321396
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,15,0.010583999752998351
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,15,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,15,0.010939200222492219
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,15,0.010574399679899215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,31,0.012630400061607362
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,15,0.010664000362157821
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,31,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,15,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,15,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,15,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,31,0.01462399959564209
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,31,0.012521600723266602
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,31,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,31,0.011416000127792359
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,31,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,31,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,31,0.012639999389648438
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,63,0.014761599898338317
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,31,0.010579200088977813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,63,0.01074879989027977
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,31,0.011462400108575821
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,31,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,31,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,31,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,63,0.012683199346065521
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,63,0.012491200119256973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,63,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,63,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,63,0.010636799782514573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,63,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,63,0.011097600311040878
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,63,0.011088000237941742
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,63,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,63,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,63,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,63,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,127,0.01467519998550415
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,127,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,127,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,127,0.012409599870443344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,127,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,127,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,127,0.010590399801731109
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,127,0.012571200728416443
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,127,0.012510399520397186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,127,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,127,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,127,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,127,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,127,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,255,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,255,0.014616000652313232
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,255,0.012507200241088867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,255,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,255,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,255,0.011977600306272507
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,255,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,255,0.010784000158309937
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,255,0.012614400684833526
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,255,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,255,0.010620799660682679
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,255,0.010532800108194351
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,255,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,511,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,255,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,511,0.01652639955282211
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,511,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,511,0.014460800588130951
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,511,0.012619200348854064
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,511,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,511,0.012649600207805634
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,511,0.014644800126552582
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,511,0.01462399959564209
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,511,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,511,0.012404800206422806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,1023,0.016771200299263
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,511,0.011208000034093857
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,511,0.011627200245857238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,511,0.011073599755764007
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,1023,0.02479040026664734
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,1023,0.019739200174808503
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,1023,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,1023,0.014558400213718414
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,1023,0.01449279934167862
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,1023,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,1023,0.018838399648666383
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,1023,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,1023,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,1023,0.012646399438381195
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,1023,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,1023,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,1023,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,2047,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,2047,0.02165600061416626
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,2047,0.029032000899314882
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,2047,0.020852799713611602
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,2047,0.016820800304412842
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,2047,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,2047,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,2047,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,2047,0.020708799362182617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,4095,0.029073598980903625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,2047,0.01669439971446991
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,2047,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,2047,0.014588800072669984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,2047,0.014604799449443817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,2047,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,4095,0.028996801376342772
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,4095,0.030937600135803222
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,4095,0.0227743998169899
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,4095,0.022804799675941467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,4095,0.022676800191402436
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,4095,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,4095,0.016648000478744505
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,4095,0.024732799828052522
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,4095,0.022767999768257143
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,4095,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,4095,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,4095,0.017075200378894807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,4095,0.01669279932975769
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,8191,0.04742240011692047
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,8191,0.050735998153686526
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,8191,0.055255997180938723
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,8191,0.045203199982643126
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,8191,0.044388800859451294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,8191,0.04445759952068329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,8191,0.04353919923305512
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,8191,0.03308480083942413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,8191,0.028947201371192933
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,8191,0.030868801474571227
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,8191,0.022787199914455415
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,8191,0.021991999447345735
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,8191,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,8191,0.02069920003414154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,16383,0.07321119904518128
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,16383,0.07768160104751587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,16383,0.07655199766159057
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,16383,0.09291679859161377
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,16383,0.0717087984085083
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,16383,0.0714352011680603
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,16383,0.06996319890022278
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,16383,0.05087199807167053
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,16383,0.04528000056743622
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,16383,0.04946720004081726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,16383,0.04047040045261383
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,16383,0.03903999924659729
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,16383,0.03758080005645752
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,16383,0.03907040059566498
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,32767,0.13979040384292601
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,32767,0.13655519485473633
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,32767,0.17104480266571045
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,32767,0.12616959810256959
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,32767,0.12772159576416015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,32767,0.12380479574203491
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,32767,0.1242400050163269
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,32767,0.08683680295944214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,32767,0.06095520257949829
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,32767,0.06894400119781494
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,32767,0.08608160018920899
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,32767,0.06165279746055603
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,32767,0.06093599796295166
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,32767,0.06052640080451965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,65535,0.26846880912780763
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,65535,0.23987200260162353
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,65535,0.32598559856414794
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,65535,0.23053760528564454
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,65535,0.23068640232086182
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,65535,0.22527360916137695
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,65535,0.22467041015625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,65535,0.16057920455932617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,65535,0.11881120204925537
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,65535,0.15523200035095214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,65535,0.10535199642181396
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,65535,0.10466560125350952
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,65535,0.10395840406417847
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,65535,0.10438400506973267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,16,1,1,131071,0.521830415725708
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,16,1,2,131071,0.45850558280944825
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,16,1,8,131071,0.4238719940185547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,16,1,4,131071,0.6488671779632569
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,16,1,16,131071,0.4231103897094727
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,16,1,32,131071,0.4235856056213379
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,16,1,64,131071,0.4219696044921875
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,16,1,1,131071,0.3113840103149414
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,16,1,2,131071,0.225380802154541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,16,1,4,131071,0.3016160011291504
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,16,1,8,131071,0.1904703974723816
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,16,1,16,131071,0.18920799493789672
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,1,0.014612799882888794
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,16,1,32,131071,0.18863840103149415
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,1,0.012492799758911132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,1,0.014529600739479065
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,16,1,64,131071,0.188646399974823
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,1,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,1,0.012510399520397186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,1,0.012491200119256973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,1,0.014727999269962311
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,1,0.01242239996790886
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,1,0.01451680064201355
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,1,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,1,0.01210559979081154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,1,0.010691200196743012
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,1,0.010574399679899215
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,1,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,3,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,3,0.014451199769973755
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,3,0.012486399710178375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,3,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,3,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,3,0.012486399710178375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,3,0.012433599680662155
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,3,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,3,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,3,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,3,0.011751999706029892
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,3,0.010899200290441512
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,7,0.012415999919176102
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,3,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,3,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,7,0.014664000272750855
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,7,0.012617599964141846
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,7,0.014481599628925323
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,7,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,7,0.012451200187206269
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,7,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,7,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,7,0.012495999783277511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,15,0.014505599439144135
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,7,0.014588800072669984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,7,0.012415999919176102
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,7,0.010700800269842149
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,7,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,7,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,15,0.014472000300884247
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,15,0.01263200044631958
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,15,0.01249760016798973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,15,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,15,0.01249760016798973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,15,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,15,0.014617599546909332
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,31,0.014678399264812469
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,15,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,15,0.011088000237941742
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,15,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,15,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,15,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,15,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,31,0.014505599439144135
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,31,0.012734399735927581
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,31,0.012443199753761292
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,31,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,31,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,31,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,31,0.014470399916172027
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,31,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,31,0.011300799995660782
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,31,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,63,0.01244639977812767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,31,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,63,0.012571200728416443
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,63,0.012569600343704223
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,31,0.010611200332641601
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,31,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,63,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,63,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,63,0.013169600069522858
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,63,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,63,0.014604799449443817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,63,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,63,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,63,0.010609599947929382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,63,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,63,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,63,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,127,0.014614400267601014
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,127,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,127,0.012915199995040894
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,127,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,127,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,127,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,127,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,127,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,127,0.014572800695896148
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,127,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,127,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,127,0.012444800138473511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,127,0.0106175996363163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,127,0.010601600259542465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,255,0.01907680034637451
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,255,0.01467519998550415
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,255,0.01250080019235611
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,255,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,255,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,255,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,255,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,255,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,255,0.012580800056457519
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,255,0.01239520013332367
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,511,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,255,0.010728000104427338
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,255,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,255,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,511,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,255,0.010542400181293488
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,511,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,511,0.014688000082969666
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,511,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,511,0.013225600123405457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,511,0.013911999762058258
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,511,0.014502400159835815
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,511,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,511,0.014451199769973755
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,511,0.0125231996178627
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,511,0.1584288001060486
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,511,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,511,0.012415999919176102
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,1023,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,1023,0.026958400011062623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,1023,0.020024000108242034
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,1023,0.014913600683212281
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,1023,0.016704000532627106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,1023,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,1023,0.012495999783277511
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,1023,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,1023,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,1023,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,2047,0.026913601160049438
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,1023,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,1023,0.014556799829006196
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,1023,0.012651200592517852
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,1023,0.0126351997256279
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,2047,0.031011199951171874
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,2047,0.03187040090560913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,2047,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,2047,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,2047,0.018956799805164338
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,2047,0.020556800067424774
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,2047,0.02494879961013794
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,2047,0.020785599946975708
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,2047,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,2047,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,2047,0.014596800506114959
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,2047,0.014920000731945039
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,2047,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,4095,0.049584001302719116
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,4095,0.049219200015068056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,4095,0.055508798360824584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,4095,0.0424560010433197
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,4095,0.04164159893989563
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,4095,0.04232319891452789
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,4095,0.042419201135635375
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,4095,0.03507519960403442
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,4095,0.02730880081653595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,4095,0.030926400423049928
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,4095,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,8191,0.09618080258369446
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,4095,0.018723200261592864
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,4095,0.018887999653816222
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,4095,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,8191,0.07855520248413086
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,8191,0.07655839920043946
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,8191,0.0683456003665924
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,8191,0.05035039782524109
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,8191,0.06924160122871399
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,8191,0.06832000017166137
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,8191,0.06793599724769592
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,8191,0.051622402667999265
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,8191,0.04528000056743622
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,8191,0.03811360001564026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,8191,0.03700959980487824
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,8191,0.03650079965591431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,8191,0.037062400579452516
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,16383,0.120579195022583
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,16383,0.12102240324020386
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,16383,0.14124959707260132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,16383,0.13037760257720948
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,16383,0.17253119945526124
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,16383,0.12432639598846436
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,16383,0.12164479494094849
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,16383,0.0875536024570465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,16383,0.07086719870567322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,16383,0.08614879846572876
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,16383,0.0613647997379303
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,16383,0.05953760147094726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,16383,0.05941759943962097
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,32767,0.240830397605896
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,16383,0.05977920293807983
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,32767,0.23036320209503175
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,32767,0.26947360038757323
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,32767,0.32731521129608154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,32767,0.22658720016479492
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,32767,0.22689599990844728
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,32767,0.2271872043609619
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,32767,0.16214239597320557
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,32767,0.12344000339508057
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,32767,0.15616480112075806
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,32767,0.10578399896621704
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,32767,0.1048367977142334
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,32767,0.10485600233078003
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,32767,0.10513919591903687
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,32,1,1,65535,0.5206655979156494
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,32,1,2,65535,0.4697199821472168
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,32,1,8,65535,0.43853440284729006
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,32,1,4,65535,0.6400320053100585
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,32,1,16,65535,0.43895840644836426
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,32,1,1,65535,0.31393280029296877
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,32,1,32,65535,0.4357744216918945
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,32,1,64,65535,0.4355760097503662
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,32,1,2,65535,0.2334656000137329
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,32,1,4,65535,0.29855520725250245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,32,1,8,65535,0.19632320404052733
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,32,1,16,65535,0.19429919719696045
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,1,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,32,1,32,65535,0.19477759599685668
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,1,0.014295999705791474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,32,1,64,65535,0.19467999935150146
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,1,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,1,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,1,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,1,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,1,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,1,0.012619200348854064
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,1,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,1,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,3,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,1,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,1,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,1,0.011897599697113037
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,1,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,3,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,3,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,3,0.012555199861526489
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,3,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,3,0.011407999694347382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,3,0.012478400021791458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,3,0.012425599992275238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,3,0.016524800658226015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,3,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,3,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,3,0.010556799918413162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,3,0.010569600015878677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,3,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,7,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,7,0.014454400539398194
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,7,0.012652799487113953
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,7,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,7,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,7,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,7,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,7,0.012675200402736665
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,7,0.012521600723266602
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,7,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,7,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,7,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,7,0.010529600083827972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,7,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,15,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,15,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,15,0.01462240070104599
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,15,0.012600000202655792
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,15,0.012627199292182922
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,15,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,15,0.01242239996790886
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,15,0.016518400609493257
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,15,0.012529599666595458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,15,0.012809599936008453
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,15,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,15,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,15,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,15,0.010502400249242783
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,31,0.0126351997256279
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,31,0.016678400337696075
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,31,0.014567999541759491
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,31,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,31,0.014679999649524688
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,31,0.013687999546527862
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,31,0.012647999823093415
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,31,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,31,0.01079839989542961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,31,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,31,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,31,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,31,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,31,0.01061599999666214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,63,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,63,0.014556799829006196
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,63,0.01451520025730133
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,63,0.013780799508094788
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,63,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,63,0.012583999335765839
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,63,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,63,0.01252640038728714
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,63,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,63,0.012571200728416443
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,63,0.01247519999742508
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,127,0.014614400267601014
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,63,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,63,0.010583999752998351
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,63,0.010556799918413162
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,127,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,127,0.014531199634075165
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,127,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,127,0.012656000256538392
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,127,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,127,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,127,0.0126351997256279
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,127,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,127,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,127,0.012510399520397186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,127,0.01067200005054474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,127,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,127,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,255,0.0170864000916481
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,255,0.019014400243759156
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,255,0.014596800506114959
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,255,0.012628799676895142
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,255,0.01252640038728714
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,255,0.012580800056457519
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,255,0.012555199861526489
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,255,0.01671359986066818
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,255,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,255,0.01101600006222725
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,255,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,255,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,255,0.010552000254392624
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,511,0.014672000706195832
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,255,0.010649599879980088
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,511,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,511,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,511,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,511,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,511,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,511,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,511,0.01563200056552887
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,511,0.020294399559497835
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,511,0.014510400593280792
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,511,0.014588800072669984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,511,0.012598399817943574
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,511,0.012409599870443344
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,511,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,1023,0.02887519896030426
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,1023,0.029425600171089174
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,1023,0.03169119954109192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,1023,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,1023,0.01876319944858551
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,1023,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,1023,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,1023,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,1023,0.02290080040693283
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,1023,0.018801599740982056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,1023,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,1023,0.014684799313545226
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,2047,0.04492959976196289
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,1023,0.014564800262451171
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,2047,0.04321599900722504
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,1023,0.014707200229167938
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,2047,0.047095999121665955
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,2047,0.051374399662017824
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,2047,0.053972798585891726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,2047,0.043249601125717164
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,2047,0.041684800386428834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,2047,0.03300960063934326
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,2047,0.029827201366424562
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,2047,0.02945919930934906
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,2047,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,2047,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,2047,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,2047,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,4095,0.07676960229873657
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,4095,0.0780463993549347
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,4095,0.09370719790458679
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,4095,0.07114560008049012
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,4095,0.06899840235710145
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,4095,0.069268798828125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,4095,0.03701280057430267
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,4095,0.07017760276794434
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,4095,0.04970720112323761
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,4095,0.047284799814224246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,4095,0.050012797117233276
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,4095,0.03959679901599884
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,4095,0.03707039952278137
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,4095,0.03669120073318481
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,8191,0.13951200246810913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,8191,0.1315232038497925
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,8191,0.17060480117797852
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,8191,0.1246351957321167
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,8191,0.12319999933242798
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,8191,0.12118719816207886
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,8191,0.12116479873657227
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,8191,0.08524960279464722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,8191,0.07260800004005433
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,8191,0.0858847975730896
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,8191,0.06169120073318481
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,8191,0.059734398126602174
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,8191,0.05953599810600281
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,8191,0.05955680012702942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,16383,0.2697664022445679
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,16383,0.24324638843536378
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,16383,0.326200008392334
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,16383,0.23086559772491455
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,16383,0.230019211769104
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,16383,0.22897920608520508
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,16383,0.10756800174713135
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,16383,0.22726719379425048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,16383,0.15815680027008056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,16383,0.12587039470672606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,16383,0.15618079900741577
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,16383,0.10562720298767089
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,16383,0.10477759838104247
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,16383,0.10510239601135254
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,64,1,1,32767,0.5214032173156739
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,64,1,2,32767,0.4688896179199219
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,64,1,4,32767,0.6371183872222901
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,64,1,8,32767,0.4392831802368164
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,64,1,32,32767,0.4367551803588867
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,64,1,16,32767,0.4388448238372803
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,64,1,64,32767,0.438043212890625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,64,1,1,32767,0.30504798889160156
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,64,1,2,32767,0.2350640058517456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,64,1,16,32767,0.19483040571212767
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,64,1,4,32767,0.2957855939865112
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,64,1,8,32767,0.19680000543594361
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,64,1,32,32767,0.19352799654006958
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,1,0.022862400114536285
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,1,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,64,1,64,32767,0.19457600116729737
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,1,0.020681600272655486
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,1,0.014572800695896148
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,1,0.017875200510025023
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,1,0.012671999633312225
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,1,0.012622399628162384
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,1,0.012673600018024445
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,1,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,3,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,1,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,1,0.01265919953584671
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,1,0.012414400279521943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,1,0.012328000366687774
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,1,0.010995200276374817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,3,0.022771200537681578
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,3,0.020956799387931824
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,3,0.014567999541759491
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,3,0.012656000256538392
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,3,0.01313599944114685
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,3,0.01358720064163208
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,3,0.022918400168418885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,3,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,3,0.01725119948387146
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,3,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,3,0.012375999987125397
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,3,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,7,0.01448799967765808
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,3,0.011097600311040878
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,7,0.013435199856758118
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,7,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,7,0.017268800735473634
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,7,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,7,0.014585599303245544
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,7,0.013227200508117676
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,7,0.0227743998169899
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,7,0.016625599563121797
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,7,0.01669600009918213
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,7,0.012639999389648438
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,7,0.01268640011548996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,7,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,7,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,15,0.02267040014266968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,15,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,15,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,15,0.014547200500965118
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,15,0.014534400403499603
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,15,0.014577600359916686
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,15,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,15,0.022806400060653688
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,15,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,15,0.017057600617408752
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,31,0.016808000206947327
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,15,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,15,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,15,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,15,0.010737600177526474
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,31,0.022516800463199614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,31,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,31,0.014604799449443817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,31,0.01417119950056076
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,31,0.013950400054454803
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,31,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,31,0.022961600124835967
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,31,0.016624000668525696
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,31,0.01701440066099167
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,31,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,31,0.01138240024447441
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,31,0.010782399773597717
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,31,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,63,0.022806400060653688
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,63,0.016728000342845918
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,63,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,63,0.014897599816322327
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,63,0.014302399754524232
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,63,0.014507199823856353
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,63,0.014588800072669984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,63,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,63,0.01653600037097931
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,63,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,63,0.01266240030527115
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,63,0.011764799803495407
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,63,0.012390399724245072
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,63,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,127,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,127,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,127,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,127,0.01462240070104599
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,127,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,127,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,127,0.0124208003282547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,127,0.014574399590492249
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,255,0.02821120023727417
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,127,0.02285439968109131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,255,0.019083200395107268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,127,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,127,0.017211200296878816
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,127,0.012665599584579468
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,127,0.012200000137090683
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,127,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,255,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,255,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,255,0.01459999978542328
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,255,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,255,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,255,0.0247856006026268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,255,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,255,0.017153599858283998
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,255,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,255,0.012611199915409089
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,255,0.011675199866294861
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,255,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,511,0.038649600744247434
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,511,0.028915199637413024
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,511,0.033011201024055484
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,511,0.020694400370121
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,511,0.01809599995613098
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,511,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,511,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,511,0.030748799443244934
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,511,0.020737600326538087
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,511,0.021961599588394165
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,511,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,511,0.013463999330997466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,511,0.012505599856376648
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,511,0.013867199420928955
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,1023,0.053495997190475465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,1023,0.050307202339172366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,1023,0.04108000099658966
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,1023,0.05632320046424866
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,1023,0.04341599941253662
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,1023,0.041022399067878725
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,1023,0.04107840061187744
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,1023,0.04071039855480194
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,1023,0.028937599062919615
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,1023,0.03218240141868591
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,1023,0.018412800133228303
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,1023,0.01971839964389801
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,1023,0.018030400574207305
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,1023,0.016625599563121797
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,2047,0.06569600105285645
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,2047,0.08460000157356262
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,2047,0.07711840271949769
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,2047,0.09649440050125122
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,2047,0.0697983980178833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,2047,0.06898239850997925
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,2047,0.06758880019187927
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,2047,0.056403201818466184
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,2047,0.047147199511528015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,2047,0.05257760286331177
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,2047,0.03796800076961517
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,4095,0.1298624038696289
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,2047,0.03688479959964752
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,2047,0.03601439893245697
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,2047,0.035873600840568544
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,4095,0.14817440509796143
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,4095,0.1747056007385254
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,4095,0.12339839935302735
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,4095,0.12075200080871581
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,4095,0.11996480226516723
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,4095,0.12008639574050903
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,4095,0.09069759845733642
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,4095,0.07191200256347656
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,4095,0.08913919925689698
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,4095,0.061470401287078855
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,4095,0.059566402435302736
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,4095,0.05947200059890747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,8191,0.241811203956604
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,4095,0.05829280018806458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,8191,0.32748639583587646
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,8191,0.2739151954650879
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,8191,0.23262081146240235
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,8191,0.22959680557250978
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,8191,0.22531518936157227
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,8191,0.22595360279083251
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,8191,0.12544959783554077
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,8191,0.10481760501861573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,8191,0.16651040315628052
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,8191,0.15851999521255494
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,8191,0.10655679702758789
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,8191,0.10353599786758423
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,8191,0.10395200252532959
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,128,1,2,16383,0.46795201301574707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,128,1,1,16383,0.5318704128265381
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,128,1,8,16383,0.44242401123046876
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,128,1,4,16383,0.6379280090332031
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,128,1,16,16383,0.4373007774353027
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,128,1,32,16383,0.4356383800506592
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,128,1,64,16383,0.4368544101715088
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,128,1,1,16383,0.31260321140289304
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,128,1,2,16383,0.23554561138153077
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,128,1,8,16383,0.1971232056617737
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,128,1,4,16383,0.3000463962554932
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,128,1,16,16383,0.19447679519653321
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,1,0.02563839852809906
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,1,0.040513598918914796
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,128,1,32,16383,0.19415839910507202
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,128,1,64,16383,0.19393759965896606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,1,0.03293119966983795
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,1,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,1,0.018787199258804323
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,1,0.01889120042324066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,1,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,1,0.038140800595283506
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,1,0.024873599410057068
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,1,0.02687999904155731
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,1,0.018694399297237395
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,1,0.014795200526714325
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,1,0.01520639955997467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,1,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,3,0.040956801176071166
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,3,0.024817599356174468
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,3,0.03236159980297089
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,3,0.020929600298404693
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,3,0.01931840032339096
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,3,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,3,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,3,0.038606399297714235
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,3,0.02476159930229187
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,3,0.026940798759460448
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,3,0.01682240068912506
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,3,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,3,0.016038399934768677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,3,0.014932799339294433
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,7,0.04102559983730316
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,7,0.025753599405288697
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,7,0.03281440138816834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,7,0.02683520019054413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,7,0.02080959975719452
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,7,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,7,0.019508799910545348
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,7,0.018969599902629853
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,7,0.03733600080013275
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,7,0.02476159930229187
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,7,0.017654399573802947
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,7,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,15,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,7,0.016209599375724793
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,7,0.014884799718856812
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,15,0.04121919870376587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,15,0.024872000515460967
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,15,0.03294239938259125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,15,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,15,0.02036159932613373
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,15,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,15,0.0389519989490509
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,15,0.024750399589538574
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,15,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,15,0.01818400025367737
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,15,0.01592160016298294
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,15,0.015081599354743958
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,31,0.019577600061893463
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,15,0.01624000072479248
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,31,0.041068801283836366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,31,0.025953599810600282
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,31,0.03308799862861633
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,31,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,31,0.019713599979877473
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,31,0.01876319944858551
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,31,0.03835679888725281
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,31,0.024736000597476958
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,31,0.02685759961605072
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,31,0.01725279986858368
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,31,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,31,0.01451520025730133
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,31,0.014720000326633453
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,63,0.04146080017089844
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,63,0.026161599159240722
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,63,0.033257600665092465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,63,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,63,0.020321600139141083
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,63,0.01916320025920868
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,63,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,63,0.03912000060081482
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,63,0.024806399643421174
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,63,0.026972800493240356
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,63,0.01679999977350235
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,63,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,63,0.014617599546909332
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,63,0.01488800048828125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,127,0.041203200817108154
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,127,0.02699680030345917
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,127,0.03311040103435516
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,127,0.02171040028333664
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,127,0.018806399405002595
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,127,0.019678400456905366
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,127,0.018972800672054292
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,127,0.0390639990568161
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,127,0.02489439994096756
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,127,0.02680160105228424
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,127,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,127,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,127,0.014531199634075165
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,127,0.016254399716854096
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,255,0.0489984005689621
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,255,0.03911519944667816
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,255,0.0394351989030838
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,255,0.022327999770641326
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,255,0.019952000677585603
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,255,0.019679999351501463
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,255,0.019392000138759614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,255,0.04378400146961212
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,255,0.029025599360466003
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,255,0.026958400011062623
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,255,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,255,0.015617600083351136
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,255,0.015744000673294067
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,255,0.01581760048866272
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,511,0.06392800211906433
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,511,0.05495200157165527
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,511,0.06240479946136475
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,511,0.044947201013565065
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,511,0.04148800075054169
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,511,0.041438400745391846
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,511,0.04091359972953797
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,511,0.05292959809303284
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,511,0.04079520106315613
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,511,0.03802399933338165
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,511,0.022833600640296936
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,511,0.02064639925956726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,511,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,511,0.019908800721168518
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,1023,0.0947808027267456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,1023,0.08460000157356262
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,1023,0.10171040296554565
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,1023,0.07129600048065185
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,1023,0.06956319808959961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,1023,0.06771680116653442
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,1023,0.06727679967880248
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,1023,0.03698880076408386
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,1023,0.06846079826354981
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,1023,0.054179197549819945
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,1023,0.05771039724349976
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,1023,0.03957920074462891
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,1023,0.03708159923553467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,1023,0.03676480054855347
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,2047,0.1600383996963501
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,2047,0.1392192006111145
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,2047,0.1799456000328064
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,2047,0.12389119863510131
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,2047,0.12105599641799927
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,2047,0.12022080421447753
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,2047,0.12029600143432617
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,2047,0.10152959823608398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,2047,0.08340640068054199
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,2047,0.0931280016899109
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,2047,0.06404320001602173
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,2047,0.06160640120506287
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,2047,0.06135680079460144
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,2047,0.06034719944000244
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,4095,0.28987998962402345
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,4095,0.22521119117736815
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,4095,0.2503376007080078
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,4095,0.3317744016647339
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,4095,0.229369592666626
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,4095,0.224401593208313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,4095,0.22448480129241943
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,4095,0.17810879945755004
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,4095,0.135534405708313
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,4095,0.1643280029296875
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,4095,0.11030880212783814
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,4095,0.10753920078277587
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,4095,0.10647679567337036
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,4095,0.10612640380859376
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,256,1,1,8191,0.548744010925293
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,256,1,2,8191,0.4762159824371338
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,256,1,8,8191,0.43991518020629883
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,256,1,4,8191,0.6370431900024414
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,256,1,16,8191,0.4366015911102295
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,256,1,32,8191,0.43095197677612307
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,256,1,2,8191,0.24606080055236818
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,256,1,1,8191,0.321230411529541
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,256,1,64,8191,0.4331791877746582
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,256,1,8,8191,0.2005295991897583
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,256,1,4,8191,0.30685598850250245
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,256,1,16,8191,0.1967743992805481
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,1,0.07763680219650268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,256,1,32,8191,0.19723360538482665
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,1,0.045659199357032776
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,256,1,64,8191,0.1961359977722168
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,1,0.05352479815483093
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,1,0.03314560055732727
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,1,0.02887519896030426
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,1,0.031041601300239564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,1,0.02892799973487854
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,1,0.06987360119819641
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,1,0.04329279959201813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,1,0.0412304013967514
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,1,0.026870399713516235
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,1,0.022676800191402436
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,1,0.024084800481796266
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,1,0.022672000527381896
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,3,0.07200480103492737
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,3,0.04581600129604339
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,3,0.06960480213165283
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,3,0.053283202648162845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,3,0.03306080102920532
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,3,0.02969760000705719
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,3,0.028832000494003297
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,3,0.028867200016975403
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,3,0.04126879870891571
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,3,0.04327679872512817
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,3,0.026977598667144775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,3,0.022678400576114654
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,3,0.022940799593925476
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,3,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,7,0.07123039960861206
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,7,0.047019198536872864
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,7,0.05491840243339539
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,7,0.03295679986476898
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,7,0.02972320020198822
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,7,0.02964319884777069
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,7,0.029174399375915528
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,7,0.06986079812049865
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,7,0.041116800904273984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,7,0.04324159920215607
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,7,0.027088001370429993
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,7,0.022767999768257143
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,7,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,7,0.022673599421977997
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,15,0.0717087984085083
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,15,0.047465598583221434
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,15,0.0699184000492096
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,15,0.05416640043258667
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,15,0.033292800188064575
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,15,0.030862399935722352
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,15,0.02277279943227768
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,15,0.028910401463508605
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,15,0.030878400802612303
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,15,0.04169760048389435
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,15,0.044409599900245664
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,15,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,15,0.022836799919605254
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,15,0.02276480048894882
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,31,0.07117279767990112
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,31,0.047353601455688475
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,31,0.05504639744758606
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,31,0.03491680026054382
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,31,0.031150400638580322
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,31,0.02961600124835968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,31,0.02958880066871643
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,31,0.06991040110588073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,31,0.04298079907894135
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,31,0.043705600500106814
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,31,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,31,0.022785599529743194
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,31,0.022734400629997254
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,31,0.022353599965572356
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,63,0.07082239985466003
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,63,0.04807040095329285
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,63,0.05743359923362732
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,63,0.043438398838043214
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,63,0.03328480124473572
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,63,0.030561599135398864
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,63,0.030910399556159974
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,63,0.031062400341033934
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,63,0.06984159946441651
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,63,0.04405440092086792
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,127,0.071288001537323
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,63,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,63,0.022920000553131103
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,63,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,63,0.022801600396633148
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,127,0.05130879878997803
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,127,0.061161601543426515
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,127,0.037243199348449704
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,127,0.029734399914741517
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,127,0.02956480085849762
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,127,0.031064000725746155
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,127,0.0704096019268036
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,127,0.04526239931583405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,127,0.045484799146652224
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,255,0.08585759997367859
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,127,0.028527998924255372
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,127,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,127,0.022804799675941467
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,127,0.02276960015296936
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,255,0.06614720225334167
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,255,0.06580960154533386
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,255,0.049728000164031984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,255,0.04336479902267456
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,255,0.04293760061264038
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,255,0.041131201386451724
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,255,0.07414079904556274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,255,0.05365440249443054
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,255,0.04886719882488251
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,255,0.02945440113544464
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,511,0.09642720222473145
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,255,0.02292799949645996
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,255,0.02290239930152893
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,511,0.07059680223464966
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,255,0.022703999280929567
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,511,0.11337920427322387
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,511,0.10310080051422119
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,511,0.0680624008178711
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,511,0.07760159969329834
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,511,0.07221760153770447
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,511,0.06931999921798707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,511,0.0879311978816986
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,511,0.06675040125846862
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,511,0.0446368008852005
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,511,0.03973920047283173
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,511,0.039087998867034915
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,511,0.03837760090827942
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,1023,0.1756991982460022
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,1023,0.15117440223693848
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,1023,0.17374880313873292
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,1023,0.12936160564422608
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,1023,0.12087359428405761
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,1023,0.0687936007976532
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,1023,0.12112640142440796
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,1023,0.12173600196838379
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,1023,0.11705600023269654
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,1023,0.09462400078773499
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,1023,0.09888799786567688
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,1023,0.06378560066223145
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,1023,0.061564797163009645
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,1023,0.061475199460983274
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,2047,0.3020047903060913
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,2047,0.26541121006011964
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,2047,0.22476959228515625
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,2047,0.3162192106246948
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,2047,0.23327839374542236
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,2047,0.22717599868774413
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,2047,0.22368159294128417
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,2047,0.1904415965080261
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,2047,0.1500656008720398
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,2047,0.16272640228271484
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,2047,0.11696159839630127
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,2047,0.11074399948120117
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,2047,0.1090127944946289
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,2047,0.1089743971824646
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,512,1,2,4095,0.49766879081726073
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,512,1,1,4095,0.5542223930358887
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,512,1,8,4095,0.44478559494018555
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,512,1,4,4095,0.5951536178588868
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,512,1,16,4095,0.43501601219177244
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,512,1,32,4095,0.43258237838745117
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,512,1,64,4095,0.43320798873901367
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,512,1,2,4095,0.26454079151153564
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,512,1,1,4095,0.3362704038619995
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,512,1,8,4095,0.2092655897140503
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,512,1,16,4095,0.20298240184783936
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,512,1,32,4095,0.20082879066467285
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,512,1,4,4095,0.30650079250335693
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,1,0.12792320251464845
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,1,0.08223839998245239
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,512,1,64,4095,0.20043520927429198
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,1,0.10293279886245728
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,1,0.05458719730377197
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,1,0.08083999752998353
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,1,0.04941279888153076
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,1,0.047804799675941465
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,1,0.0506991982460022
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,1,0.127185595035553
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,1,0.0760047972202301
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,1,0.045454400777816775
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,1,0.03707360029220581
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,3,0.10180000066757203
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,3,0.055593597888946536
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,1,0.03550080060958862
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,3,0.049326398968696596
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,1,0.034999999403953555
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,3,0.12894879579544066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,3,0.08221759796142578
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,3,0.04736160039901734
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,3,0.04886879920959473
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,3,0.12742400169372559
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,3,0.07594720125198365
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,3,0.07889119982719421
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,3,0.045372799038887024
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,3,0.03718239963054657
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,3,0.03504799902439117
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,3,0.03507519960403442
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,7,0.12915040254592897
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,7,0.08217440247535705
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,7,0.10177919864654542
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,7,0.0555728018283844
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,7,0.049316799640655516
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,7,0.048502400517463684
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,7,0.04880160093307495
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,7,0.12727839946746827
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,7,0.07596480250358581
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,7,0.07980960011482238
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,7,0.04531520009040833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,7,0.03710399866104126
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,15,0.10294239521026612
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,7,0.03535679876804352
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,7,0.03503519892692566
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,15,0.12887680530548096
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,15,0.04857600033283234
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,15,0.0821183979511261
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,15,0.05616800189018249
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,15,0.049414399266242984
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,15,0.049297600984573364
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,15,0.1273136019706726
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,15,0.07660800218582153
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,15,0.08051040172576904
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,15,0.04527519941329956
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,15,0.037222400307655334
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,15,0.03508000075817108
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,15,0.03516319990158081
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,31,0.12975360155105592
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,31,0.08220160007476807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,31,0.05821120142936707
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,31,0.10468480587005616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,31,0.04968479871749878
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,31,0.048614400625228885
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,31,0.04838719964027405
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,31,0.12721760272979737
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,31,0.07678400278091431
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,31,0.08255839943885804
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,31,0.045363199710845944
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,31,0.03705919981002807
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,31,0.03500480055809021
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,31,0.035071998834609985
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,63,0.04840799868106842
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,63,0.12883360385894777
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,63,0.08246240019798279
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,63,0.1064687967300415
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,63,0.06187999844551086
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,63,0.05149120092391968
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,63,0.04939360022544861
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,63,0.1272207975387573
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,63,0.07765759825706482
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,63,0.08289120197296143
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,63,0.045465600490570066
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,63,0.03738879859447479
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,63,0.03607200086116791
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,63,0.03515360057353974
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,127,0.12693599462509156
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,127,0.08808479905128479
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,127,0.10884319543838501
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,127,0.06804320216178894
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,127,0.05740799903869629
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,127,0.055534398555755614
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,127,0.05552160143852234
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,127,0.07806079983711242
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,127,0.035304000973701476
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,127,0.12726720571517944
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,127,0.08339999914169312
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,127,0.050032001733779904
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,127,0.03916960060596466
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,127,0.035183998942375186
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,255,0.15650880336761475
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,255,0.11501439809799194
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,255,0.1179487943649292
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,255,0.08103520274162293
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,255,0.07042400240898132
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,255,0.06975359916687011
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,255,0.06732640266418458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,255,0.134280002117157
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,255,0.09059680104255677
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,255,0.08397279977798462
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,255,0.054611200094223024
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,255,0.0433023989200592
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,255,0.04075360000133514
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,255,0.039961600303649904
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,511,0.2152048110961914
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,511,0.1662511944770813
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,511,0.19261280298233033
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,511,0.13152480125427246
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,511,0.1223647952079773
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,511,0.11951520442962646
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,511,0.1201200008392334
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,511,0.16202880144119264
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,511,0.11245599985122681
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,511,0.13248319625854493
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,511,0.07836480140686035
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,511,0.0658079981803894
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,511,0.06230720281600952
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,511,0.061582398414611814
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,1023,0.34232640266418457
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,1023,0.27503039836883547
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,1023,0.33730878829956057
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,1023,0.22834560871124268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,1023,0.21782081127166747
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,1023,0.21521599292755128
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,1023,0.2131472110748291
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,1023,0.23041920661926268
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,1023,0.16735199689865113
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,1023,0.1852560043334961
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,1023,0.12133439779281616
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,1023,0.1093600034713745
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,1023,0.10656800270080566
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,1023,0.1050495982170105
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,128,1024,1,1,2047,0.5939551830291748
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,64,1024,1,2,2047,0.4911695957183838
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,16,1024,1,8,2047,0.4263599872589111
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,32,1024,1,4,2047,0.6210368156433106
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,8,1024,1,16,2047,0.413267183303833
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,4,1024,1,32,2047,0.41083202362060545
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,float16,2,1024,1,64,2047,0.4076511859893799
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,128,1024,1,1,2047,0.3816864013671875
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,64,1024,1,2,2047,0.27837119102478025
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,16,1024,1,8,2047,0.20953121185302734
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,32,1024,1,4,2047,0.31696319580078125
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,8,1024,1,16,2047,0.19863519668579102
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,4,1024,1,32,2047,0.19440000057220458
SGLang,0.5.6.post2,NVIDIA B200,mla_generation,trtllm_mla,float16,fp8,2,1024,1,64,2047,0.19251199960708618
