framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,1,0.05552639961242676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,1,0.03922719955444336
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,3,0.05575199723243714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,3,0.03922879993915558
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,7,0.05589119791984558
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,7,0.03938080072402954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,15,0.05550240278244019
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,15,0.03920640051364899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,31,0.057999998331069946
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,31,0.03930239975452423
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,127,0.0406143993139267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,511,0.16844799518585205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,1023,0.16029759645462036
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,2047,0.6127664089202881
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,2047,0.3124608039855957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,1,0.017214399576187134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,63,0.0614031970500946
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,255,0.09644160270690919
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,3,0.018108800053596497
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,7,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,1,0.008491200208663941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,1023,0.3126336097717285
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,15,0.018467199802398682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,15,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,float16,127,0.0653551995754242
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,31,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,63,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,127,0.01680160015821457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,31,0.016808000206947327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,63,0.018241600692272188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,127,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,511,0.017560000717639922
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,255,0.016867199540138246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,511,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,1023,0.010308799892663955
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,2047,0.02084160000085831
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,float16,1023,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,2047,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,1,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,3,0.01669279932975769
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,1,0.016499200463294984
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,3,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,7,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,7,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,15,0.01658560037612915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,31,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,31,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,63,0.01669120043516159
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,63,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,127,0.017151999473571777
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,127,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,255,0.017340800166130065
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,255,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,511,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,511,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,1023,0.02077440023422241
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,1023,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,float16,2047,0.04013760089874267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,2047,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,1,0.0371071994304657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,63,0.03911679983139038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,1,0.0230320006608963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,3,0.022860799729824067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,511,0.08446080088615418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,7,0.03709760010242462
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,3,0.03549120128154755
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,15,0.037099200487136844
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,3,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,31,0.03683359920978546
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,7,0.022804799675941467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,15,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,63,0.036852800846099855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,7,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,31,0.022868800163269042
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,127,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,63,0.022945599257946016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,64,128,1,float16,fp8,255,0.04780640006065369
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,255,0.06004160046577454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,127,0.0432671993970871
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,511,0.09611679911613465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,255,0.02884320020675659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,511,0.047228801250457766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,1023,0.08503519892692565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,64,128,1,float16,fp8,255,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,1023,0.1691856026649475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,1,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,fp8,2047,0.16196000576019287
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,3,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,1,0.008627200126647949
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,15,0.018745599687099455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,64,128,1,float16,float16,2047,0.31491520404815676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,31,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,3,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,7,0.018769599497318268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,7,0.008753599971532822
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,15,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,31,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,63,0.008539199829101562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,63,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,127,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,127,0.008560000360012055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,255,0.01870879977941513
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,511,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,255,0.00838399976491928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,511,0.02094080001115799
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,1023,0.041119998693466185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,float16,2047,0.059673601388931276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,64,128,1,float16,fp8,15,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,1023,0.013278399407863618
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,64,128,1,float16,fp8,2047,0.028867200016975403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,1,0.09871839880943298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,7,0.09867200255393982
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,1,0.07192320227622986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,3,0.09878559708595276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,3,0.07191680073738098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,15,0.10533759593963624
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,7,0.07185919880867005
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,31,0.07191839814186096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,127,0.11281280517578125
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,31,0.10472160577774048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,15,0.07188159823417664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,63,0.10606559514999389
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,63,0.07222880125045776
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,255,0.08653600215911865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,127,0.07192959785461425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,255,0.17666720151901244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,511,0.31963679790496824
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,float16,1,0.18066240549087526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,511,0.159660804271698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,fp8,3,0.13544800281524658
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,fp8,1,0.1353808045387268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,float16,7,0.18896160125732422
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,float16,3,0.18089439868927001
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,float16,1023,0.6076576232910156
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,float16,15,0.1891711950302124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,float16,31,0.1895151972770691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,fp8,7,0.135315203666687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,64,128,1,float16,fp8,1023,0.30956320762634276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,fp8,15,0.13546719551086425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,fp8,31,0.1359120011329651
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,float16,127,0.20192480087280273
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,fp8,63,0.13573440313339233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,float16,63,0.1907904028892517
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,fp8,127,0.13530880212783813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,fp8,255,0.16500320434570312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,64,128,1,float16,float16,255,0.33139359951019287
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,float16,3,0.36106081008911134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,float16,1,0.34949278831481934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,fp8,1,0.262391996383667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,fp8,3,0.2626415967941284
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,float16,15,0.36091039180755613
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,float16,7,0.36176960468292235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,fp8,7,0.26440160274505614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,float16,31,0.36157760620117185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,fp8,15,0.2629328012466431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,fp8,31,0.26311678886413575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,fp8,63,0.26323680877685546
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,float16,63,0.3621887922286987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,1,0.010543999820947647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,float16,127,0.3865200042724609
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,1,0.02167679965496063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,3,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,15,0.010547199845314026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,64,128,1,float16,fp8,127,0.26106240749359133
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,3,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,15,0.020827199518680572
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,7,0.021087999641895293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,7,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,31,0.010531199723482132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,63,0.022812800109386445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,31,0.020803199708461763
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,127,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,63,0.01096320003271103
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,127,0.022198399901390074
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,255,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,511,0.04110400080680847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,255,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,511,0.014499199390411378
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,2047,0.09807680249214172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,1023,0.02887519896030426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,float16,1023,0.05963839888572693
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,64,128,1,float16,fp8,2047,0.047356799244880676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,float16,1,0.7001455783843994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,fp8,1,0.5191472053527832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,fp8,7,0.5192463874816895
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,float16,7,0.7006415843963623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,float16,3,0.7004064083099365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,fp8,3,0.519871997833252
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,float16,15,0.7010799884796143
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,float16,31,0.7006415843963623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,fp8,31,0.518232011795044
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,fp8,15,0.5185247898101807
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,fp8,63,0.5176303863525391
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,64,128,1,float16,float16,63,0.7031343936920166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,fp8,3,1.0309871673583983
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,fp8,1,1.0312399864196777
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,float16,3,1.3819439888000489
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,fp8,7,1.0298640251159668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,float16,1,1.3809295654296876
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,float16,7,1.3803183555603027
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,1,0.026921600103378296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,1,0.014655999839305878
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,float16,15,1.3810208320617676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,3,0.01459999978542328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,3,0.026766398549079896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,7,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,7,0.014603200554847717
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,15,0.026819199323654175
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,fp8,15,1.029047966003418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,31,0.02680160105228424
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,float16,31,1.3812031745910645
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,63,0.014731200039386749
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,31,0.014614400267601014
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,15,0.014537599682807923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,255,0.014905600249767304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,64,128,1,float16,fp8,31,1.0278047561645507
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,63,0.026862400770187377
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,127,0.0268528014421463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,127,0.014574399590492249
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,255,0.04114400148391724
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,511,0.028960001468658448
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,511,0.059569597244262695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,1023,0.04821760058403015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,1023,0.09630560278892517
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,float16,2047,0.16945600509643555
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,64,128,1,float16,fp8,2047,0.08627359867095948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,1,0.045263999700546266
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,1,0.030876800417900085
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,3,0.030985599756240843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,3,0.04535999894142151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,7,0.0308896005153656
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,7,0.045342400670051575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,15,0.04534080028533936
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,15,0.03089280128479004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,31,0.045296001434326175
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,31,0.030953601002693176
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,63,0.05139679908752441
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,63,0.031079998612403868
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,127,0.03185920119285583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,255,0.07796000242233277
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,511,0.06572960019111633
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,127,0.05351679921150208
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,255,0.038708800077438356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,511,0.1323024034500122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,1023,0.24003999233245848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,1023,0.12258239984512329
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,1,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,3,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,float16,2047,0.4562511920928955
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,7,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,48,48,128,1,float16,fp8,2047,0.2361135959625244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,15,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,31,0.018756799399852753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,63,0.01764640063047409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,127,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,255,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,1,0.008643200248479843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,511,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,7,0.00857279971241951
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,3,0.008481600135564805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,511,0.010526400059461594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,1023,0.020689600706100465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,1023,0.010532800108194351
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,float16,2047,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,2047,0.010699199885129929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,1,0.017323200404644013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,31,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,3,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,1,0.007390400022268295
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,63,0.008483199775218964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,3,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,127,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,15,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,7,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,7,0.008238399773836136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,48,48,128,1,float16,fp8,255,0.008404800295829773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,15,0.017019200325012206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,15,0.007859200239181519
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,31,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,31,0.007769600301980972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,63,0.01786399930715561
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,63,0.007627200335264206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,127,0.01671359986066818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,127,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,1023,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,255,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,255,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,511,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,511,0.00835840031504631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,1023,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,float16,2047,0.030985599756240843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,48,48,128,1,float16,fp8,2047,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,1,0.03129119873046875
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,1,0.01911199986934662
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,3,0.031491199135780336
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,3,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,7,0.030976000428199767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,7,0.01923519968986511
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,15,0.03151040077209473
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,15,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,31,0.030955201387405394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,31,0.019526399672031403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,63,0.03130080103874207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,63,0.018982400000095368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,127,0.03699040114879608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,127,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,255,0.05124160051345825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,1023,0.13269280195236205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,255,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,511,0.07816479802131653
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,511,0.038468798995018004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,1023,0.06654400229454041
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,1,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,float16,2047,0.23999838829040526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,1,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,48,48,128,1,float16,fp8,2047,0.12262719869613647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,3,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,3,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,7,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,63,0.01855680048465729
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,7,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,63,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,15,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,15,0.008380799740552902
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,31,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,31,0.008348800241947174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,127,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,127,0.008507200330495835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,1023,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,255,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,255,0.00835840031504631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,511,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,511,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,1023,0.03148959875106812
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,float16,2047,0.05335360169410706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,48,48,128,1,float16,fp8,2047,0.019915199279785155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,1,0.07809759974479676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,1,0.05548319816589355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,3,0.07824959754943847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,3,0.0555184006690979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,7,0.07826560139656066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,7,0.05548319816589355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,15,0.05560160279273987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,31,0.08606560230255127
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,15,0.07868160009384155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,31,0.05570719838142395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,63,0.08633440136909484
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,127,0.09028800129890442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,63,0.05604640245437622
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,255,0.13846399784088134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,255,0.06867039799690247
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,127,0.05630559921264648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,511,0.12198560237884522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,511,0.24725279808044434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,fp8,1,0.10288000106811523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,float16,1,0.13971519470214844
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,float16,3,0.1406399965286255
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,fp8,1023,0.23506720066070558
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,float16,7,0.14450080394744874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,48,48,128,1,float16,float16,1023,0.46238079071044924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,fp8,3,0.10304640531539917
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,fp8,7,0.10308480262756348
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,float16,15,0.14788639545440674
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,float16,31,0.14975039958953856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,fp8,31,0.10462080240249634
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,fp8,15,0.10316159725189208
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,float16,63,0.14918240308761596
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,fp8,255,0.12692960500717163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,float16,127,0.15985920429229736
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,fp8,127,0.10304160118103027
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,fp8,63,0.10470880270004272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,48,48,128,1,float16,float16,255,0.25597119331359863
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,float16,1,0.2641024112701416
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,fp8,1,0.1988576054573059
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,float16,3,0.2726480007171631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,float16,7,0.2767312049865723
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,fp8,7,0.198852801322937
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,fp8,3,0.1993407964706421
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,float16,15,0.2786240100860596
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,fp8,15,0.20013439655303955
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,float16,31,0.2768143892288208
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,fp8,31,0.19905120134353638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,float16,63,0.2772687911987305
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,fp8,63,0.19906560182571412
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,1,0.01982239931821823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,1,0.010564800351858139
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,float16,127,0.29449119567871096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,3,0.02072480022907257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,3,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,31,0.020694400370121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,48,48,128,1,float16,fp8,127,0.19889919757843016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,7,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,7,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,15,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,15,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,31,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,63,0.02011680006980896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,63,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,127,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,127,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,255,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,255,0.010364799946546554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,511,0.030959999561309813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,511,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,1023,0.05090879797935486
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,1023,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,fp8,1,0.3898544073104858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,float16,2047,0.07768639922142029
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,48,48,128,1,float16,fp8,2047,0.03743039965629578
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,float16,1,0.5304560184478759
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,float16,3,0.5302768230438233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,float16,7,0.5305503845214844
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,fp8,3,0.39147520065307617
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,fp8,7,0.39141600131988524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,float16,15,0.5306896209716797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,fp8,15,0.3911407947540283
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,fp8,31,0.38976800441741943
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,float16,31,0.5308527946472168
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,float16,63,0.5312592029571533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,48,48,128,1,float16,fp8,63,0.39065279960632326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,fp8,1,0.7749711990356445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,float16,1,1.040555191040039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,fp8,3,0.7749040126800537
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,float16,3,1.0402031898498536
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,1,0.02473440021276474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,fp8,7,0.77467360496521
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,float16,7,1.0409071922302247
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,1,0.01268800050020218
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,3,0.024723200500011443
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,float16,15,1.0407376289367676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,15,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,float16,31,1.0402303695678712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,3,0.01422400027513504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,fp8,15,0.7739615917205811
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,7,0.024723200500011443
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,7,0.01271200031042099
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,48,48,128,1,float16,fp8,31,0.7722591876983642
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,15,0.01249760016798973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,31,0.024751999974250795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,255,0.012476799637079239
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,31,0.01356160044670105
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,63,0.013547199964523315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,127,0.024721600115299225
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,63,0.024860799312591553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,127,0.012590399384498597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,255,0.031779199838638306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,2047,0.13546719551086425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,511,0.05133119821548462
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,511,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,float16,1023,0.07901120185852051
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,1023,0.039201599359512326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,1,0.04121119976043701
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,1,0.027539199590682982
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,3,0.04114719927310943
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,48,48,128,1,float16,fp8,2047,0.06758880019187927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,3,0.02680160105228424
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,7,0.04107680022716522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,15,0.041223999857902524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,7,0.02698880136013031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,15,0.02757599949836731
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,31,0.041252800822258
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,31,0.026843199133872987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,63,0.026846399903297423
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,63,0.043300798535346983
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,127,0.04930559992790222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,127,0.0280239999294281
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,255,0.06909279823303223
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,255,0.033980798721313474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,511,0.11377919912338257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,511,0.05721759796142578
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,1023,0.1038159966468811
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,1023,0.20409600734710692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,1,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,3,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,float16,2047,0.3830111980438232
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,40,40,128,1,float16,fp8,2047,0.19896320104599
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,7,0.016756799817085267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,15,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,31,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,63,0.01668799966573715
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,127,0.01676799952983856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,255,0.01676799952983856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,1,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,255,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,511,0.016590400040149687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,511,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,3,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,1023,0.018632000684738158
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,7,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,1023,0.008656000345945358
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,float16,2047,0.02069759964942932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,15,0.008401600271463394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,2047,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,1,0.016939200460910797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,127,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,1,0.006606400012969971
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,31,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,40,40,128,1,float16,fp8,63,0.008499199897050858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,3,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,3,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,7,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,7,0.007297600060701371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,63,0.008312000334262848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,127,0.016857600212097167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,15,0.016859200596809388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,15,0.006916800141334533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,31,0.016659200191497803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,31,0.0071263998746871945
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,63,0.01661120057106018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,255,0.01703519970178604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,127,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,255,0.008257599920034409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,511,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,511,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,1023,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,1023,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,float16,2047,0.025353598594665527
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,40,40,128,1,float16,fp8,2047,0.012647999823093415
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,1,0.02901119887828827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,1,0.016734400391578676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,3,0.02886880040168762
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,3,0.016897599399089813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,31,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,7,0.028939199447631837
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,7,0.01674399971961975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,15,0.028990399837493897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,15,0.01669120043516159
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,31,0.028811201453208923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,63,0.028830400109291075
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,63,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,127,0.03102880120277405
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,127,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,255,0.045316800475120544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,255,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,511,0.06789119839668274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,511,0.033456000685691836
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,1023,0.11380959749221801
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,1023,0.05704960227012634
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,float16,2047,0.20433599948883058
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,1,0.017576000094413756
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,40,40,128,1,float16,fp8,2047,0.10427680015563964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,1,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,3,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,3,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,63,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,7,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,7,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,15,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,255,0.018632000684738158
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,15,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,255,0.008585599809885025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,511,0.02067680060863495
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,31,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,31,0.008833599835634231
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,63,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,127,0.017969599366188048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,127,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,511,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,1023,0.024905599653720856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,1023,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,float16,2047,0.05310080051422119
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,40,40,128,1,float16,fp8,2047,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,1,0.06576160192489625
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,1,0.047336000204086306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,3,0.06571679711341857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,3,0.04746719896793365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,7,0.06578400135040283
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,7,0.04734239876270294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,15,0.06609119772911072
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,15,0.047358399629592894
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,31,0.0700111985206604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,31,0.0473471999168396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,63,0.07062559723854064
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,127,0.07576640248298645
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,63,0.04769439995288849
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,127,0.04854080080986023
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,255,0.11549439430236816
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,255,0.05889120101928711
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,511,0.20503840446472169
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,511,0.1033519983291626
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,fp8,1,0.08835999965667725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,float16,1023,0.38411359786987304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,fp8,3,0.08802239894866944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,float16,1,0.11890560388565063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,40,40,128,1,float16,fp8,1023,0.19797120094299317
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,float16,7,0.11964319944381714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,float16,3,0.11923359632492066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,fp8,7,0.08795520067214965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,fp8,15,0.08806399703025818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,float16,31,0.12626399993896484
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,float16,15,0.12697919607162475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,fp8,31,0.08833280205726624
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,fp8,63,0.08837119936943054
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,float16,127,0.134225594997406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,float16,63,0.12736159563064575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,fp8,127,0.0883184015750885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,float16,255,0.2145359992980957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,40,40,128,1,float16,fp8,255,0.10766880512237549
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,fp8,1,0.1669551968574524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,float16,3,0.22383520603179932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,float16,1,0.2225264072418213
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,fp8,7,0.16687840223312378
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,fp8,3,0.16671520471572876
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,float16,15,0.2338032007217407
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,float16,7,0.2338655948638916
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,float16,31,0.2340240001678467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,fp8,15,0.1680624008178711
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,float16,63,0.23440959453582763
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,fp8,63,0.16814080476760865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,fp8,31,0.16835520267486573
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,1,0.020483200252056123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,3,0.01876160055398941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,3,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,float16,127,0.2481247901916504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,1,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,7,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,40,40,128,1,float16,fp8,127,0.16722400188446046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,7,0.010601600259542465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,15,0.018753600120544434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,127,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,15,0.010756800323724747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,31,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,63,0.020207999646663664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,63,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,31,0.01881439983844757
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,127,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,255,0.019121600687503813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,255,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,511,0.026464000344276428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,511,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,1023,0.04803839921951294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,1023,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,fp8,2047,0.03522239923477173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,40,40,128,1,float16,float16,2047,0.0733568012714386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,fp8,1,0.32618560791015627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,float16,7,0.4466207981109619
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,float16,1,0.44057598114013674
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,float16,3,0.44633121490478517
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,fp8,3,0.3261327981948853
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,fp8,7,0.32910881042480467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,fp8,15,0.32767200469970703
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,fp8,31,0.32678239345550536
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,float16,31,0.4469344139099121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,fp8,63,0.32629120349884033
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,float16,15,0.4455999851226807
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,40,40,128,1,float16,float16,63,0.446614408493042
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,fp8,1,0.6470335960388184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,float16,1,0.8709152221679688
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,fp8,3,0.6471583843231201
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,float16,3,0.8707216262817383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,fp8,7,0.6461552143096924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,float16,7,0.8710463523864747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,1,0.022852799296379088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,1,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,3,0.02281759977340698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,fp8,15,0.6456416130065918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,3,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,15,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,7,0.02287680059671402
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,31,0.022726400196552275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,31,0.012521600723266602
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,fp8,31,0.6451600074768067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,float16,15,0.8709888458251953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,40,40,128,1,float16,float16,31,0.8706064224243164
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,7,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,15,0.022808000445365906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,63,0.022755199670791627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,63,0.012511999905109405
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,127,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,127,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,255,0.0249551996588707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,255,0.012492799758911132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,511,0.04532159864902496
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,511,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,1023,0.06985599994659424
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,1023,0.033022400736808774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,float16,2047,0.11515200138092041
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,40,40,128,1,float16,fp8,2047,0.05744959712028504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,1,0.03515999913215637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,1,0.02276639938354492
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,3,0.03537760078907013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,3,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,7,0.03692319989204407
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,7,0.022801600396633148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,15,0.022734400629997254
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,31,0.03662559986114502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,15,0.03552959859371185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,31,0.02279520034790039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,63,0.036427199840545654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,63,0.022886399924755097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,127,0.043163201212882994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,127,0.022811199724674224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,255,0.059889602661132815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,255,0.028905600309371948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,511,0.04750080108642578
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,511,0.09654560089111328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,1023,0.1678928017616272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,1023,0.08570079803466797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,1,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,2047,0.31273279190063474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,2047,0.16119680404663086
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,3,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,7,0.01672320067882538
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,fp8,4095,0.3122960090637207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,32,128,1,float16,float16,4095,0.6021520137786865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,15,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,31,0.016606399416923524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,63,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,127,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,1,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,255,0.016620799899101257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,255,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,511,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,3,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,7,0.008403199911117553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,511,0.008327999711036682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,1023,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,15,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,1023,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,2047,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,2047,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,63,0.008343999832868576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,float16,4095,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,1,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,4095,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,3,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,7,0.00838399976491928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,1,0.01727519929409027
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,127,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,3,0.017046399414539337
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,32,128,1,float16,fp8,31,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,7,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,15,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,15,0.008515200018882752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,31,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,31,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,63,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,63,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,127,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,127,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,255,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,255,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,511,0.016756799817085267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,511,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,1023,0.018838399648666383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,1023,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,2047,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,2047,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,float16,4095,0.04086720049381256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,32,128,1,float16,fp8,4095,0.01266079992055893
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,15,0.026361599564552307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,1,0.026952001452445983
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,1,0.014776000380516052
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,3,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,3,0.014502400159835815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,7,0.026104000210762025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,7,0.01467519998550415
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,15,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,31,0.027008000016212463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,31,0.014655999839305878
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,63,0.02677919864654541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,63,0.014619199931621552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,127,0.026817598938941957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,127,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,255,0.04116640090942383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,255,0.014575999975204468
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,2047,0.16864960193634032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,511,0.059617602825164796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,511,0.028833600878715514
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,1,0.008343999832868576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,1023,0.09610720276832581
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,1023,0.048582398891448976
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,2047,0.08618559837341308
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,1,0.01709599941968918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,float16,4095,0.313590407371521
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,15,0.008500800281763077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,32,128,1,float16,fp8,4095,0.1621008038520813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,3,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,3,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,7,0.016944000124931337
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,7,0.008337599784135818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,15,0.016667200624942778
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,31,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,31,0.016710400581359863
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,63,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,63,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,127,0.01671839952468872
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,127,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,255,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,255,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,2047,0.012598399817943574
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,511,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,511,0.00835840031504631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,1023,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,1023,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,2047,0.04007520079612732
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,float16,4095,0.05883839726448059
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,7,0.039129599928855896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,32,128,1,float16,fp8,4095,0.026982399821281432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,1,0.05573599934577942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,1,0.03918400108814239
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,3,0.05551999807357788
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,3,0.039211198687553406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,7,0.05556640028953552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,15,0.055687999725341795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,15,0.039284801483154295
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,31,0.05954880118370056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,31,0.03927200138568878
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,63,0.03915359973907471
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,127,0.06495680212974549
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,63,0.06159520149230957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,127,0.04116640090942383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,255,0.0969103991985321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,255,0.04919039905071258
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,511,0.16929759979248046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,511,0.08472800254821777
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,1023,0.3125488042831421
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,1,0.09938880205154418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,1,0.07186880111694335
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,1023,0.16056480407714843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,3,0.0990831971168518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,3,0.07196159958839417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,fp8,2047,0.3128767967224121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,32,128,1,float16,float16,2047,0.6012544155120849
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,7,0.09912639856338501
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,7,0.07183200120925903
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,15,0.07194560170173644
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,63,0.07191200256347656
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,31,0.10600479841232299
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,15,0.10429439544677735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,31,0.07190399765968322
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,63,0.10701279640197754
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,127,0.11234879493713379
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,127,0.07192800045013428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,255,0.1769055962562561
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,255,0.08980640172958373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,float16,3,0.1807695984840393
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,float16,1,0.18119839429855347
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,fp8,511,0.15911200046539306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,fp8,7,0.13540159463882445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,fp8,1,0.13541439771652222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,fp8,3,0.13537280559539794
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,32,128,1,float16,float16,511,0.3216336011886597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,float16,7,0.19060319662094116
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,float16,15,0.1915104031562805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,fp8,15,0.13564800024032592
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,float16,31,0.19090240001678466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,fp8,31,0.13547199964523315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,float16,63,0.19090240001678466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,fp8,63,0.1355839967727661
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,float16,127,0.2025696039199829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,3,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,fp8,255,0.16836800575256347
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,fp8,127,0.1356063961982727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,1,0.018782399594783783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,1,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,32,128,1,float16,float16,255,0.3329119920730591
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,7,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,3,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,7,0.008401600271463394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,15,0.01883520036935806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,15,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,31,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,31,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,63,0.008529599756002426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,127,0.018756799399852753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,1023,0.04109599888324737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,127,0.008399999886751174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,1023,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,63,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,255,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,4095,0.09602879881858825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,255,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,511,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,511,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,fp8,1,0.2625744104385376
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,float16,2047,0.05963519811630249
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,2047,0.028828799724578857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,32,128,1,float16,fp8,4095,0.04814240038394928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,fp8,7,0.26408801078796384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,float16,1,0.35067999362945557
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,float16,3,0.36061758995056153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,fp8,3,0.26232318878173827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,float16,7,0.3613487958908081
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,float16,15,0.3614943981170654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,fp8,15,0.26344640254974366
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,fp8,31,0.26281440258026123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,float16,31,0.36065599918365476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,float16,63,0.3628335952758789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,fp8,63,0.26255199909210203
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,float16,127,0.38266239166259763
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,32,128,1,float16,fp8,127,0.2619312047958374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,float16,1,0.7005856037139893
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,fp8,1,0.5187136173248291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,fp8,3,0.5186960220336914
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,float16,3,0.7012112140655518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,float16,7,0.7017360210418702
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,fp8,7,0.5186575889587403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,fp8,15,0.518177604675293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,float16,15,0.7006735801696777
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,1,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,fp8,31,0.5181280136108398
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,float16,31,0.7011104106903077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,1,0.011238399893045425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,3,0.020857599377632142
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,7,0.010595200210809707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,3,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,float16,63,0.7021120071411133
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,32,128,1,float16,fp8,63,0.5175824165344238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,7,0.020852799713611602
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,63,0.021063999831676485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,15,0.02128320038318634
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,15,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,31,0.02091040015220642
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,31,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,63,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,127,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,127,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,255,0.020844799280166627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,255,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,511,0.041201600432395936
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,2047,0.04728800058364868
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,511,0.01451680064201355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,1023,0.05968800187110901
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,1023,0.028984001278877257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,2047,0.09725919961929322
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,1,0.03089280128479004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,float16,4095,0.17190879583358765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,32,128,1,float16,fp8,4095,0.08542079925537109
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,1,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,3,0.030947199463844298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,3,0.01884319931268692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,7,0.018787199258804323
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,15,0.03089120090007782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,63,0.01897439956665039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,7,0.030910399556159974
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,15,0.019232000410556793
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,31,0.03107840120792389
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,63,0.031124800443649292
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,31,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,127,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,127,0.036985599994659425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,255,0.05073440074920654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,255,0.020975999534130096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,511,0.07804960012435913
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,511,0.03867680132389069
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,1023,0.13305280208587647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,1023,0.06614879965782165
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,2047,0.24082560539245607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,2047,0.1240447998046875
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,1,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,3,0.01748320013284683
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,float16,4095,0.45739521980285647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,24,24,128,1,float16,fp8,4095,0.2373807907104492
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,7,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,15,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,31,0.01719360053539276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,63,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,127,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,255,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,1,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,511,0.01822720021009445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,3,0.008484800159931184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,511,0.008511999994516373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,7,0.008535999804735184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,1023,0.018569600582122803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,15,0.008675199747085572
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,1023,0.009520000219345093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,2047,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,2047,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,31,0.008656000345945358
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,1,0.008566399663686752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,float16,4095,0.0226160004734993
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,63,0.008531200140714646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,4095,0.012145599722862244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,1,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,3,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,15,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,255,0.008548799902200699
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,3,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,24,24,128,1,float16,fp8,127,0.008660800009965896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,7,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,7,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,15,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,31,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,31,0.008507200330495835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,63,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,63,0.008540800213813782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,127,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,127,0.008755200356245042
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,255,0.018780800700187682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,255,0.009062399715185165
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,511,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,511,0.009279999881982803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,1023,0.019700799882411957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,1023,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,2047,0.022171199321746826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,2047,0.010755199939012527
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,float16,4095,0.030964800715446474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,24,24,128,1,float16,fp8,4095,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,1,0.0247856006026268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,1,0.012587200105190276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,3,0.023907199501991272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,3,0.013471999764442444
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,7,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,7,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,15,0.024742400646209715
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,15,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,31,0.02362399995326996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,31,0.013672000169754029
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,63,0.024907200038433074
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,63,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,127,0.02471359968185425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,127,0.013575999438762665
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,255,0.03054080009460449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,255,0.012646399438381195
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,511,0.05053439736366272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,511,0.020582400262355804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,1023,0.07910720109939576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,1023,0.03857440054416657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,2047,0.13533439636230468
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,2047,0.0671231985092163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,1,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,1,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,float16,4095,0.24643518924713134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,24,24,128,1,float16,fp8,4095,0.12641760110855102
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,3,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,3,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,7,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,7,0.007631999999284744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,15,0.016756799817085267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,15,0.008345600217580795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,31,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,31,0.007416000217199325
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,63,0.017115199565887453
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,63,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,127,0.01751520037651062
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,127,0.008399999886751174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,2047,0.031007999181747438
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,255,0.016884799301624297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,255,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,511,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,511,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,1023,0.020759999752044678
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,1023,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,2047,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,float16,4095,0.050577598810195926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,24,24,128,1,float16,fp8,4095,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,1,0.045342400670051575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,1,0.031091201305389404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,3,0.04540959894657135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,3,0.030931198596954347
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,7,0.046076801419258115
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,7,0.030910399556159974
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,15,0.04531840085983276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,15,0.030883198976516722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,31,0.045419201254844666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,31,0.031115201115608216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,63,0.05150560140609741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,63,0.03099839985370636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,127,0.0544655978679657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,127,0.03167040050029755
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,255,0.07843040227890015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,255,0.038736000657081604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,511,0.132915198802948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,1,0.07931680083274842
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,511,0.06588799953460693
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,3,0.07883359789848328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,2047,0.45597119331359864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,float16,1023,0.24001600742340087
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,1023,0.12233599424362182
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,1,0.055632001161575316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,3,0.05558879971504212
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,24,24,128,1,float16,fp8,2047,0.23823199272155762
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,7,0.0782047986984253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,7,0.05548160076141358
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,15,0.0803391993045807
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,15,0.05553920269012451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,127,0.05632479786872864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,31,0.08637440204620361
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,31,0.055511999130249026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,255,0.06844639778137207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,63,0.08714879751205444
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,63,0.05659840106964111
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,127,0.0905023992061615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,float16,3,0.14162559509277345
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,255,0.1399775981903076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,float16,511,0.24756319522857667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,float16,1,0.14142719507217408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,24,24,128,1,float16,fp8,511,0.12197279930114746
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,fp8,15,0.10339200496673584
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,fp8,1,0.10298880338668823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,fp8,3,0.10318880081176758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,float16,63,0.15094239711761476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,float16,7,0.14379520416259767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,fp8,7,0.1029520034790039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,float16,15,0.15004320144653321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,float16,31,0.15195679664611816
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,fp8,31,0.10469119548797608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,fp8,63,0.10458559989929199
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,3,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,float16,127,0.15945279598236084
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,3,0.008343999832868576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,fp8,127,0.10478880405426025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,1,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,float16,255,0.2568687915802002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,24,24,128,1,float16,fp8,255,0.12864480018615723
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,1,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,7,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,7,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,15,0.01855199933052063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,15,0.008540800213813782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,31,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,31,0.00852160006761551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,63,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,63,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,511,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,127,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,127,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,255,0.018769599497318268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,255,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,511,0.020694400370121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,1023,0.0318015992641449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,1023,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,2047,0.0537168025970459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,fp8,1,0.19889919757843016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,2047,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,float16,4095,0.08374559879302979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,24,24,128,1,float16,fp8,4095,0.04169439971446991
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,float16,7,0.2762671947479248
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,float16,1,0.2648864030838013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,float16,3,0.2743439912796021
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,fp8,3,0.19886879920959472
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,fp8,7,0.19906879663467408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,float16,15,0.27714240550994873
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,fp8,15,0.19904160499572754
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,fp8,63,0.2003119945526123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,float16,31,0.27703680992126467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,fp8,31,0.19909119606018066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,float16,63,0.27873280048370364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,float16,127,0.29329280853271483
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,24,24,128,1,float16,fp8,127,0.19899519681930541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,float16,1,0.5303567886352539
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,fp8,1,0.3909600019454956
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,fp8,3,0.3912480115890503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,float16,3,0.5306416034698487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,float16,7,0.5315087795257568
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,fp8,7,0.39130239486694335
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,float16,15,0.5313375949859619
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,1,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,fp8,15,0.3903743982315063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,float16,31,0.5321728229522705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,fp8,31,0.3896095991134644
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,1,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,3,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,float16,63,0.5322959899902344
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,3,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,24,24,128,1,float16,fp8,63,0.3904560089111328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,7,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,7,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,15,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,15,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,31,0.01980479955673218
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,31,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,63,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,63,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,127,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,127,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,255,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,255,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,511,0.012444800138473511
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,511,0.029528000950813295
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,4095,0.06619679927825928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,1023,0.050134402513504026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,1023,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,2047,0.0776639997959137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,fp8,2047,0.03777439892292023
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,24,24,128,1,float16,float16,4095,0.13338559865951538
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,1,0.02678399980068207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,1,0.014716799557209014
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,3,0.02675360143184662
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,31,0.01470080018043518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,63,0.026902401447296144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,3,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,7,0.026899200677871705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,7,0.014574399590492249
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,15,0.026743999123573302
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,15,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,31,0.026793599128723145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,63,0.014766399562358857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,127,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,127,0.014542399346828461
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,255,0.04107039868831634
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,255,0.014608000218868256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,511,0.05963039994239807
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,511,0.02869440019130707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,1023,0.09627360105514526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,1023,0.0478879988193512
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,2047,0.16876640319824218
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,2047,0.08582080006599427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,1,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,4095,0.3131983995437622
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,4095,0.1650720000267029
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,3,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,fp8,8191,0.3236959934234619
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,15,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,16,128,1,float16,float16,8191,0.6040863990783691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,7,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,31,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,63,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,127,0.01863359957933426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,1,0.008483199775218964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,255,0.01727519929409027
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,3,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,255,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,511,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,511,0.00854559987783432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,1023,0.018830400705337525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,7,0.008664000034332275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,1023,0.008668799698352814
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,2047,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,2047,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,15,0.008659200370311737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,4095,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,4095,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,63,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,31,0.00854720026254654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,float16,8191,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,127,0.008670400083065032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,16,128,1,float16,fp8,8191,0.012435200065374375
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,1,0.016651199758052827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,1,0.008483199775218964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,3,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,3,0.008302400261163712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,7,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,7,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,15,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,15,0.008345600217580795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,31,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,31,0.008396799862384795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,63,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,63,0.008513599634170532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,127,0.01757279932498932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,127,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,255,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,255,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,511,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,511,0.008324799686670303
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,1023,0.017385600507259368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,1023,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,2047,0.01889760047197342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,2047,0.010249599814414978
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,4095,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,3,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,4095,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,float16,8191,0.03960959911346436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,16,128,1,float16,fp8,8191,0.013121600449085235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,1,0.020745599269866945
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,1,0.010620799660682679
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,3,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,7,0.020654399693012238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,7,0.010524799674749374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,15,0.02147040069103241
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,15,0.010576000064611435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,31,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,31,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,511,0.0404911994934082
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,63,0.021590399742126464
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,511,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,63,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,127,0.021937599778175353
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,127,0.010579200088977813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,2047,0.047393599152565004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,255,0.02205760031938553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,4095,0.17081600427627563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,255,0.01053600013256073
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,1023,0.059648001194000246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,1023,0.02900800108909607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,2047,0.0970304012298584
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,4095,0.08570399880409241
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,1,0.018564799427986146
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,1,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,3,0.018555200099945067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,float16,8191,0.3184767961502075
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,31,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,16,128,1,float16,fp8,8191,0.16456960439682006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,3,0.00851840004324913
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,7,0.01693120002746582
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,7,0.008399999886751174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,15,0.0176704004406929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,15,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,31,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,63,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,63,0.008401600271463394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,127,0.016780799627304076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,127,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,255,0.01905920058488846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,255,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,511,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,511,0.008484800159931184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,1023,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,1023,0.008531200140714646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,2047,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,2047,0.01053600013256073
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,4095,0.04120959937572479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,4095,0.01263359934091568
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,3,0.02290239930152893
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,float16,8191,0.05945600271224975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,16,128,1,float16,fp8,8191,0.0288239985704422
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,1,0.036643201112747194
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,1,0.022915199398994446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,3,0.03584319949150085
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,7,0.03655360043048859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,7,0.022924800217151643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,15,0.03648479878902435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,15,0.022873599827289582
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,31,0.037011200189590455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,31,0.022815999388694764
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,63,0.03569439947605133
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,63,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,511,0.09627040028572083
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,127,0.043252798914909366
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,127,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,255,0.059952002763748166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,255,0.028910401463508605
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,511,0.047260800004005434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,1023,0.1684720039367676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,1023,0.0847823977470398
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,2047,0.16178879737854004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,2047,0.3122495889663696
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,1,0.055585598945617674
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,1,0.0391728013753891
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,3,0.056492799520492555
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,3,0.039192000031471254
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,float16,4095,0.6014272212982178
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,16,128,1,float16,fp8,4095,0.3173167943954468
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,15,0.055961602926254274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,7,0.05621600151062012
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,7,0.03917120099067688
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,15,0.039156800508499144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,31,0.058575999736785886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,31,0.03918879926204681
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,63,0.06180480122566223
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,63,0.03919520080089569
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,127,0.06572960019111633
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,127,0.0411215990781784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,255,0.0985472023487091
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,255,0.048728001117706296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,511,0.17010879516601562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,511,0.0846015989780426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,1,0.0993552029132843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,float16,1023,0.314136004447937
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,1,0.07199360132217407
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,16,128,1,float16,fp8,1023,0.15970560312271118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,3,0.10075839757919311
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,3,0.07192959785461425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,7,0.10089919567108155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,7,0.07195680141448975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,15,0.10579839944839478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,15,0.07196320295333862
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,31,0.10795520544052124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,31,0.0722320020198822
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,63,0.10902080535888672
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,63,0.07214239835739136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,127,0.11297440528869629
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,127,0.07184640169143677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,255,0.17852319478988649
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,255,0.08930720090866089
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,1,0.016991999745368958
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,1,0.008312000334262848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,float16,511,0.32226879596710206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,16,128,1,float16,fp8,511,0.15907520055770874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,3,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,3,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,7,0.016571199893951415
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,7,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,15,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,127,0.008313599973917007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,15,0.008327999711036682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,31,0.01658560037612915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,31,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,63,0.01672320067882538
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,63,0.008343999832868576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,127,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,255,0.016715200245380403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,255,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,511,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,511,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,1023,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,1023,0.010585600137710571
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,8191,0.04686079919338226
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,2047,0.03995360136032104
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,2047,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,4095,0.05901600122451782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,fp8,4095,0.027039998769760133
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,16,128,1,float16,float16,8191,0.09556000232696533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,float16,1,0.1807471990585327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,fp8,1,0.13544000387191774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,float16,3,0.18180639743804933
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,fp8,3,0.13538880348205568
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,fp8,31,0.13549439907073973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,float16,7,0.1901535987854004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,fp8,7,0.13542399406433106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,float16,15,0.19158879518508912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,fp8,15,0.13546719551086425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,float16,31,0.19102239608764648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,float16,63,0.19243359565734863
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,fp8,63,0.1361840009689331
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,float16,127,0.20209119319915772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,fp8,127,0.13559999465942382
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,float16,255,0.33387041091918945
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,float16,1,0.3484400033950806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,16,128,1,float16,fp8,255,0.17011840343475343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,fp8,1,0.26243679523468016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,float16,3,0.3606528043746948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,fp8,3,0.2627264022827148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,float16,7,0.362006402015686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,fp8,7,0.2630863904953003
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,float16,15,0.3612767934799194
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,fp8,15,0.2626255989074707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,fp8,31,0.26276159286499023
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,float16,31,0.3619823932647705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,float16,63,0.3626607894897461
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,fp8,63,0.2641711950302124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,1,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,3,0.00950239971280098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,1,0.008556800335645676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,fp8,127,0.26295199394226076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,7,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,16,128,1,float16,float16,127,0.3807487964630127
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,31,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,3,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,63,0.008742400258779526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,7,0.008633600175380706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,15,0.018624000251293182
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,15,0.008534400165081025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,31,0.008504000306129456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,127,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,127,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,63,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,255,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,255,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,511,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,4095,0.09637920260429382
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,511,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,1023,0.04116480052471161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,1023,0.01327040046453476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,2047,0.05958719849586487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,2047,0.0287200003862381
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,4095,0.04731679856777191
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,float16,8191,0.16890560388565062
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,1,0.02473440021276474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,1,0.01348000019788742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,15,0.02484479993581772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,3,0.02468319982290268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,31,0.024747200310230255
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,31,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,16,128,1,float16,fp8,8191,0.08830879926681519
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,3,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,7,0.0247856006026268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,7,0.013569599390029908
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,15,0.012646399438381195
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,63,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,63,0.013556799292564392
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,127,0.024753600358963013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,127,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,255,0.030766400694847106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,255,0.012777599692344665
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,511,0.050551998615264895
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,511,0.020559999346733093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,1023,0.07932159900665284
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,4095,0.12603039741516114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,4095,0.24612159729003907
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,1023,0.03896960020065308
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,2047,0.13534879684448242
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,2047,0.06754080057144166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,float16,8191,0.4691792011260986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,1,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,3,0.0181536003947258
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,12,12,128,1,float16,fp8,8191,0.24585280418395997
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,7,0.016755199432373045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,15,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,31,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,63,0.016806399822235106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,127,0.018568000197410582
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,255,0.016748799383640288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,1,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,511,0.018651199340820313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,3,0.008619199693202972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,511,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,1023,0.01674720048904419
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,1023,0.008955200016498566
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,7,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,15,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,2047,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,2047,0.010726399719715118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,4095,0.02078399956226349
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,31,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,4095,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,float16,8191,0.02264000028371811
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,63,0.008406399935483932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,8191,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,1,0.018539200723171233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,1,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,7,0.00862400010228157
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,127,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,3,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,12,12,128,1,float16,fp8,255,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,3,0.008566399663686752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,63,0.017696000635623932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,7,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,63,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,15,0.018723200261592864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,15,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,31,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,31,0.008535999804735184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,127,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,127,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,1023,0.00852160006761551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,255,0.017235200107097625
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,2047,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,255,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,4095,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,511,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,511,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,1023,0.01884479969739914
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,2047,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,4095,0.010542400181293488
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,float16,8191,0.03125280141830444
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,12,12,128,1,float16,fp8,8191,0.013382400572299957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,15,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,1,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,1,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,3,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,3,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,7,0.02043039947748184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,7,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,15,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,31,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,31,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,63,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,63,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,127,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,127,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,255,0.019475199282169342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,255,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,511,0.03076159954071045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,511,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,1023,0.05137760043144226
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,1023,0.01890240013599396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,2047,0.07775999903678894
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,2047,0.03853119909763336
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,4095,0.13272639513015747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,4095,0.06801599860191346
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,1,0.018803200125694274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,float16,8191,0.24210240840911865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,12,12,128,1,float16,fp8,8191,0.12517600059509276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,1,0.009019199758768082
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,15,0.00849440023303032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,31,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,3,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,3,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,7,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,127,0.008873599767684936
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,7,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,15,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,31,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,63,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,63,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,127,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,255,0.016735999286174773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,255,0.009363199770450591
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,511,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,511,0.008564800024032593
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,1023,0.02059520035982132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,1023,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,2047,0.022327999770641326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,2047,0.010564800351858139
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,4095,0.030529600381851197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,4095,0.01273919939994812
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,float16,8191,0.051363199949264526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,12,12,128,1,float16,fp8,8191,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,1,0.03097119927406311
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,1,0.018827199935913086
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,3,0.03134239912033081
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,3,0.019177600741386414
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,7,0.03203999996185303
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,7,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,15,0.03131200075149536
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,15,0.018880000710487364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,31,0.03222880065441132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,31,0.018849599361419677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,63,0.031851199269294736
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,63,0.019356800615787505
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,127,0.03708640038967133
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,127,0.019079999625682832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,255,0.050758397579193114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,255,0.021219199895858763
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,511,0.07810559868812561
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,511,0.03889760076999664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,1023,0.13221280574798583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,1,0.046639999747276305
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,1023,0.0665727972984314
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,2047,0.24018399715423583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,2047,0.12426559925079346
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,1,0.03094879984855652
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,3,0.0470768004655838
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,float16,4095,0.4566527843475342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,12,12,128,1,float16,fp8,4095,0.24155519008636475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,3,0.031065601110458373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,7,0.046884799003601076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,7,0.031001600623130798
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,15,0.0473471999168396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,15,0.031091201305389404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,31,0.04676159918308258
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,31,0.031006398797035217
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,63,0.05253919959068298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,63,0.030985599756240843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,127,0.05491359829902649
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,127,0.03272800147533417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,255,0.07974079847335816
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,255,0.03940320014953613
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,511,0.13335039615631103
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,511,0.06575999855995178
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,1,0.08027999997138976
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,float16,1023,0.24232640266418456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,12,12,128,1,float16,fp8,1023,0.12328319549560547
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,1,0.05559840202331543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,3,0.07962080240249633
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,3,0.05569919943809509
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,31,0.08835840225219727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,7,0.08012319803237915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,7,0.05552160143852234
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,15,0.08001279830932617
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,15,0.05577279925346375
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,31,0.05554080009460449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,63,0.0883791983127594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,63,0.05622400045394897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,127,0.09311839938163757
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,127,0.0570576012134552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,255,0.14131040573120118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,255,0.06944000124931335
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,1,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,1,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,float16,511,0.2500864028930664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,15,0.008326400071382523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,12,12,128,1,float16,fp8,511,0.12280479669570923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,31,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,3,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,63,0.008324799686670303
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,127,0.016686399281024934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,3,0.008313599973917007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,7,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,7,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,511,0.018646399676799773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,15,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,511,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,31,0.01659200042486191
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,2047,0.030516800284385682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,63,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,127,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,255,0.017366400361061095
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,255,0.008313599973917007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,1023,0.020603199303150178
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,1023,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,float16,1,0.14163680076599122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,2047,0.012617599964141846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,4095,0.04960319995880127
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,4095,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,float16,8191,0.07810080051422119
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,12,12,128,1,float16,fp8,8191,0.03664959967136383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,fp8,1,0.10339839458465576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,float16,3,0.14152159690856933
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,fp8,3,0.10459840297698975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,float16,7,0.14493600130081177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,fp8,7,0.10371999740600586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,float16,15,0.1501423954963684
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,fp8,15,0.10458240509033204
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,float16,31,0.15184320211410524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,fp8,31,0.10469119548797608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,float16,63,0.15187679529190062
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,fp8,63,0.10510400533676148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,float16,127,0.15884799957275392
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,fp8,127,0.10502560138702392
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,float16,255,0.25760478973388673
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,12,12,128,1,float16,fp8,255,0.1305184006690979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,float16,1,0.26446080207824707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,fp8,1,0.1991968035697937
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,float16,3,0.2732784032821655
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,fp8,3,0.20070240497589112
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,float16,7,0.2767872095108032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,fp8,7,0.20088000297546388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,fp8,15,0.200927996635437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,float16,15,0.27796480655670164
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,float16,31,0.27783679962158203
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,fp8,31,0.2008687973022461
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,float16,63,0.27849280834198
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,fp8,63,0.20113439559936525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,1,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,1,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,float16,127,0.29283039569854735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,3,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,12,12,128,1,float16,fp8,127,0.20144801139831542
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,3,0.00854559987783432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,7,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,7,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,15,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,15,0.008656000345945358
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,31,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,31,0.0084927998483181
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,63,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,63,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,127,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,127,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,255,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,255,0.008644799888134002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,511,0.020793600380420683
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,511,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,1023,0.03118079900741577
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,1023,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,2047,0.053478401899337766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,2047,0.02030239999294281
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,4095,0.08423839807510376
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,4095,0.04151679873466492
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,float16,8191,0.1444159984588623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,1,0.020827199518680572
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,12,12,128,1,float16,fp8,8191,0.07431679964065552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,7,0.010547199845314026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,1,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,31,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,3,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,3,0.010556799918413162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,7,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,15,0.02077919989824295
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,31,0.020854400098323823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,15,0.010619200021028518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,63,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,63,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,127,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,127,0.010969600081443787
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,255,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,255,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,511,0.041196799278259276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,511,0.014619199931621552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,4095,0.1716464042663574
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,1023,0.060153597593307497
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,1023,0.028995200991630554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,2047,0.09628480076789855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,2047,0.04767360091209412
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,4095,0.08626880049705506
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,1,0.016945600509643555
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,8191,0.3201312065124512
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,8191,0.16462719440460205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,3,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,7,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,fp8,16383,0.3238080024719238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,8,128,1,float16,float16,16383,0.6240320205688477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,15,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,31,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,63,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,127,0.016620799899101257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,255,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,1,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,511,0.01658879965543747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,3,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,511,0.008491200208663941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,1023,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,1023,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,7,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,2047,0.019441600143909454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,15,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,2047,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,4095,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,4095,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,8191,0.022676800191402436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,31,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,8191,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,float16,16383,0.024799999594688416
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,63,0.008556800335645676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,16383,0.012668800354003907
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,1,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,127,0.008401600271463394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,1,0.00859839990735054
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,8,128,1,float16,fp8,255,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,3,0.01733600050210953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,3,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,63,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,7,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,7,0.008544000238180161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,15,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,15,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,31,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,31,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,63,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,127,0.01736319959163666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,127,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,255,0.018760000169277192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,255,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,511,0.017849600315093993
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,511,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,1023,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,1023,0.008499199897050858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,2047,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,2047,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,4095,0.021668800711631776
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,4095,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,8191,0.02282879948616028
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,8191,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,float16,16383,0.043219199776649474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,8,128,1,float16,fp8,16383,0.014585599303245544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,1,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,15,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,1,0.00849440023303032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,3,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,3,0.008550400286912918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,7,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,7,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,15,0.008620800077915191
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,31,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,31,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,63,0.01879359930753708
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,63,0.00891520008444786
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,127,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,127,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,255,0.018756799399852753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,255,0.008542399853467941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,511,0.02072480022907257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,511,0.01051200032234192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,1023,0.04100160002708435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,1023,0.013019199669361114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,2047,0.05950080156326294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,2047,0.02871040105819702
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,4095,0.09645119905471802
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,4095,0.04763199985027313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,8191,0.16915199756622315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,8191,0.0885263979434967
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,1,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,1,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,float16,16383,0.3159231901168823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,15,0.016590400040149687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,3,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,8,128,1,float16,fp8,16383,0.1713919997215271
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,31,0.00833280012011528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,3,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,63,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,63,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,127,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,7,0.016572800278663636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,7,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,15,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,31,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,127,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,255,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,255,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,511,0.01658560037612915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,511,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,1023,0.016812799870967864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,1023,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,2047,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,16383,0.05899519920349121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,2047,0.010168000310659408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,4095,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,4095,0.010526400059461594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,float16,8191,0.039977601170539855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,8191,0.01284320056438446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,8,128,1,float16,fp8,16383,0.02911199927330017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,1,0.02678079903125763
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,1,0.014683200418949128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,3,0.02675360143184662
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,3,0.01449279934167862
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,7,0.026982399821281432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,7,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,127,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,15,0.026966398954391478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,15,0.014575999975204468
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,31,0.026824000477790832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,31,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,511,0.05955680012702942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,63,0.026763200759887695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,63,0.014547200500965118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,255,0.041116800904273984
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,127,0.014710399508476257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,255,0.015641599893569946
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,2047,0.08670079708099365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,511,0.028835201263427736
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,1023,0.09625759720802307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,1023,0.04741599857807159
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,2047,0.16956000328063964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,4095,0.3130959987640381
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,1,0.03702079951763153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,4095,0.166484797000885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,1,0.02292959988117218
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,3,0.037145599722862244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,3,0.02285760045051575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,7,0.03712320029735565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,7,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,fp8,8191,0.32527201175689696
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,8,128,1,float16,float16,8191,0.6108384132385254
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,15,0.037036800384521486
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,15,0.022974400222301482
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,31,0.03715839982032776
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,31,0.02276960015296936
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,63,0.03720960021018982
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,63,0.02284960001707077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,127,0.04477919936180115
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,127,0.023318399488925935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,255,0.061712002754211424
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,255,0.02903519868850708
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,511,0.09740960001945495
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,511,0.04735519886016846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,1023,0.16890079975128175
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,1023,0.08502560257911682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,1,0.057633602619171144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,1,0.039139199256896975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,float16,2047,0.31424798965454104
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,3,0.05750240087509155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,8,128,1,float16,fp8,2047,0.16072160005569458
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,3,0.03919839859008789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,7,0.057652801275253296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,7,0.0391072005033493
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,15,0.05753920078277588
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,15,0.03911519944667816
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,31,0.05971199870109558
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,31,0.039211198687553406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,63,0.06350240111351013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,63,0.03924480080604553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,127,0.06753119826316833
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,127,0.04110240042209625
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,255,0.09925280213356018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,255,0.05116159915924072
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,511,0.17253439426422118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,511,0.08422080278396607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,1,0.01746080070734024
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,1,0.008500800281763077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,15,0.018460799753665925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,3,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,31,0.017268800735473634
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,31,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,3,0.008392000198364257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,63,0.018380799889564516
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,float16,1023,0.3150079965591431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,8,128,1,float16,fp8,1023,0.15979520082473755
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,7,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,7,0.008535999804735184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,15,0.00838399976491928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,63,0.008380799740552902
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,127,0.017686399817466735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,127,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,255,0.016728000342845918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,255,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,511,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,511,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,1023,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,1023,0.008555199950933456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,2047,0.020615999400615693
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,2047,0.01055999994277954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,4095,0.04108479917049408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,1,0.09951040148735046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,4095,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,8191,0.05934399962425232
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,8191,0.028799998760223388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,float16,16383,0.09567360281944275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,8,128,1,float16,fp8,16383,0.04660159945487976
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,1,0.07189120054244995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,3,0.1004032015800476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,3,0.07185119986534119
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,7,0.0991487979888916
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,63,0.10883200168609619
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,7,0.07194560170173644
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,15,0.10540640354156494
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,15,0.07195519804954528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,31,0.10810719728469849
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,31,0.07190240025520325
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,63,0.07268319725990295
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,127,0.11376960277557373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,127,0.07216320037841797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,fp8,1,0.13539999723434448
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,255,0.1792304039001465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,255,0.09187520146369935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,float16,1,0.181222403049469
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,float16,511,0.32177760601043703
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,8,128,1,float16,fp8,511,0.1590224027633667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,fp8,15,0.13537919521331787
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,float16,3,0.18269920349121094
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,fp8,3,0.13547519445419312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,float16,7,0.18910720348358154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,fp8,7,0.13539520502090455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,float16,15,0.19089759588241578
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,float16,31,0.1920575976371765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,fp8,31,0.13581759929656984
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,float16,63,0.19265760183334352
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,fp8,63,0.13734400272369385
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,float16,127,0.20161120891571044
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,fp8,127,0.1373296022415161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,1,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,1,0.008313599973917007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,float16,255,0.33333919048309324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,8,128,1,float16,fp8,255,0.17547680139541627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,3,0.016771200299263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,3,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,7,0.016790400445461272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,7,0.008337599784135818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,15,0.017115199565887453
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,15,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,31,0.016575999557971954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,31,0.008396799862384795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,255,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,63,0.016752000153064727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,63,0.008392000198364257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,127,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,127,0.00830719992518425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,255,0.017479999363422392
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,511,0.018731200695037843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,511,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,1023,0.020606400072574617
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,1023,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,2047,0.03993760049343109
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,2047,0.012630400061607362
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,4095,0.059513598680496216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,4095,0.027483201026916503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,8191,0.09496319890022278
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,8191,0.04594239890575409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,1,0.018745599687099455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,3,0.008644799888134002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,float16,16383,0.16711679697036744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,8,128,1,float16,fp8,16383,0.08501759767532349
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,1,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,3,0.01863359957933426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,31,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,7,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,7,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,15,0.018780800700187682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,15,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,31,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,63,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,63,0.008380799740552902
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,127,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,127,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,255,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,255,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,511,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,511,0.010550399869680404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,1023,0.040982401371002196
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,1023,0.012783999741077422
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,2047,0.059569597244262695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,2047,0.028782400488853454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,4095,0.0964128017425537
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,4095,0.046916800737380984
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,16383,0.15907520055770874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,8191,0.16865439414978028
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,8191,0.08471519947052002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,16383,0.31644320487976074
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,1,0.018769599497318268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,3,0.016777600347995757
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,7,0.017340800166130065
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,fp8,32767,0.3104160070419312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,4,128,1,float16,float16,32767,0.6077424049377441
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,15,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,31,0.016604800522327424
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,63,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,127,0.016726399958133697
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,255,0.01677599996328354
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,1,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,511,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,511,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,3,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,1023,0.018563200533390046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,7,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,2047,0.018764799833297728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,1023,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,15,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,2047,0.008422400057315826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,4095,0.018806399405002595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,8191,0.012537600100040435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,4095,0.01071999967098236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,31,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,32767,0.02688480019569397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,8191,0.02086080014705658
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,63,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,float16,16383,0.02476799935102463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,127,0.008591999858617782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,3,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,16383,0.014534400403499603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,255,0.008406399935483932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,4,128,1,float16,fp8,32767,0.015022400021553039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,1,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,1,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,3,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,7,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,7,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,15,0.017044800519943237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,15,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,31,0.017027199268341064
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,255,0.0084927998483181
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,31,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,63,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,63,0.008544000238180161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,127,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,127,0.008643200248479843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,255,0.017796799540519714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,511,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,511,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,1023,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,1023,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,2047,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,2047,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,4095,0.02080480009317398
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,4095,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,8191,0.02247840017080307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,8191,0.012451200187206269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,16383,0.02281759977340698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,16383,0.012940800189971924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,float16,32767,0.04535360038280487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,4,128,1,float16,fp8,32767,0.01653439998626709
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,7,0.008337599784135818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,1,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,1,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,3,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,3,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,7,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,15,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,15,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,31,0.01674720048904419
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,31,0.008401600271463394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,63,0.016872000694274903
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,63,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,127,0.016551999747753142
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,127,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,255,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,255,0.008393599838018417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,511,0.017510400712490083
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,511,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,1023,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,1023,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,2047,0.041119998693466185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,2047,0.012587200105190276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,4095,0.05876799821853638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,4095,0.026862400770187377
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,8191,0.09447519779205323
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,8191,0.04591200053691864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,16383,0.16753920316696166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,16383,0.08298879861831665
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,float16,32767,0.31096959114074707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,3,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,1,0.01676799952983856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,1,0.008686400204896926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,3,0.01687840074300766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,4,128,1,float16,fp8,32767,0.15543359518051147
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,63,0.01759999990463257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,7,0.018294399976730345
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,7,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,15,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,15,0.008590400218963623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,31,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,31,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,63,0.008619199693202972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,127,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,127,0.008534400165081025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,255,0.01873279958963394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,255,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,511,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,511,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,1023,0.01770080029964447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,1023,0.008868800103664398
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,2047,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,2047,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,4095,0.020851199328899384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,4095,0.011105599999427795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,8191,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,8191,0.012583999335765839
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,16383,0.043463999032974245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,16383,0.015216000378131866
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,float16,32767,0.06189119815826416
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,4,128,1,float16,fp8,32767,0.030444800853729248
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,1,0.02136480063199997
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,1,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,3,0.020908799767494202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,3,0.01138240024447441
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,7,0.02077919989824295
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,63,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,7,0.010755199939012527
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,15,0.022511999309062957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,15,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,31,0.021972799301147462
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,31,0.011055999994277954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,63,0.021142399311065672
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,127,0.020883199572563172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,1023,0.0610912024974823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,127,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,255,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,2047,0.04735040068626404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,255,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,511,0.04107039868831634
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,511,0.014619199931621552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,1023,0.029211199283599852
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,2047,0.09747999906539917
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,4095,0.17148159742355346
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,4095,0.08343679904937744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,8191,0.31856799125671387
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,8191,0.15813119411468507
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,1,0.02701280117034912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,1,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,3,0.02714560031890869
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,7,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,3,0.014609600603580474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,7,0.027000001072883605
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,fp8,16383,0.3031775951385498
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,4,128,1,float16,float16,16383,0.6184607982635498
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,15,0.026820799708366393
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,63,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,15,0.01467359960079193
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,31,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,31,0.014628799259662628
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,63,0.026955199241638184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,127,0.014632000029087067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,127,0.02720479965209961
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,255,0.041947200894355774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,255,0.014524799585342408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,511,0.060868799686431885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,511,0.028889599442481994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,1023,0.0972544014453888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,1023,0.04763999879360199
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,2047,0.17053120136260985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,2047,0.08427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,float16,4095,0.3151295900344849
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,3,0.03734880089759827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,1,0.03732160031795502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,7,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,1,0.02298399955034256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,4,128,1,float16,fp8,4095,0.15873279571533203
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,3,0.02290399968624115
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,7,0.03777920007705689
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,15,0.022891199588775633
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,15,0.03810079991817474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,31,0.038431999087333676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,31,0.022878399491310118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,63,0.03919839859008789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,63,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,127,0.0468639999628067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,127,0.02298399955034256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,255,0.06265760064125062
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,255,0.029292801022529603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,511,0.09879519939422607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,511,0.047244799137115476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,1023,0.17342079877853395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,1023,0.08419359922409057
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,1,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,1,0.008348800241947174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,float16,2047,0.3165440082550049
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,4,128,1,float16,fp8,2047,0.15728800296783446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,3,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,31,0.01653759926557541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,3,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,7,0.016726399958133697
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,7,0.008398400247097015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,15,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,15,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,31,0.008726400136947633
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,63,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,63,0.008324799686670303
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,127,0.016940799355506898
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,127,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,255,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,255,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,511,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,511,0.008387199789285659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,1023,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,1023,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,2047,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,2047,0.01016639992594719
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,4095,0.020787200331687926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,4095,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,8191,0.04107199907302857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,8191,0.014878399670124054
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,3,0.05751360058784485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,16383,0.058271998167037965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,16383,0.028672000765800475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,fp8,32767,0.0470335990190506
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,1,0.05759360194206238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,1,0.03914400041103363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,4,128,1,float16,float16,32767,0.09451199769973755
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,3,0.039822399616241455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,7,0.05749599933624268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,7,0.03916159868240356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,63,0.03960640132427216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,15,0.05758399963378906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,15,0.039164799451828006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,31,0.059640002250671384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,31,0.03928160071372986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,127,0.06769760251045227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,63,0.06512960195541381
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,127,0.041168001294136045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,255,0.10005439519882202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,255,0.050040000677108766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,511,0.17214239835739137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,511,0.08471519947052002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,1,0.09972159862518311
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,1,0.07195039987564086
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,float16,1023,0.3169440031051636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,4,128,1,float16,fp8,1023,0.15750240087509154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,3,0.0992688000202179
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,3,0.07225279808044434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,7,0.0992031991481781
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,31,0.07236959934234619
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,7,0.0726527988910675
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,15,0.10476800203323364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,127,0.1148095965385437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,15,0.07195680141448975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,31,0.10877439975738526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,63,0.10895680189132691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,255,0.091075199842453
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,63,0.07398239970207214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,127,0.07417600154876709
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,255,0.1791983962059021
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,float16,511,0.3232111930847168
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,1,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,1,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,3,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,4,128,1,float16,fp8,511,0.1592319965362549
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,3,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,7,0.0183119997382164
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,7,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,15,0.017977599799633027
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,15,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,31,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,31,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,63,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,63,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,127,0.018192000687122345
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,1023,0.009780800342559815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,127,0.008537600189447403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,255,0.01852799952030182
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,4095,0.04108479917049408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,255,0.008507200330495835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,4095,0.013624000549316406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,511,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,511,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,1023,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,2047,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,2047,0.010711999982595444
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,8191,0.059412801265716554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,8191,0.028638398647308348
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,3,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,16383,0.09598240256309509
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,16383,0.04649440050125122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,1,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,fp8,32767,0.08276000022888183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,4,128,1,float16,float16,32767,0.16640959978103637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,1,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,7,0.016945600509643555
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,3,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,7,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,15,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,15,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,31,0.016606399416923524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,31,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,63,0.017584000527858735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,63,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,127,0.017372800409793852
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,127,0.00828320011496544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,255,0.01674720048904419
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,255,0.008337599784135818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,511,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,511,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,1023,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,1023,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,2047,0.04084959924221039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,2047,0.012608000636100769
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,4095,0.059278398752212524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,4095,0.028142398595809935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,8191,0.09439039826393128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,8191,0.04596000015735626
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,16383,0.16622719764709473
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,16383,0.08228960037231445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,32767,0.3103584051132202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,1,0.014683200418949128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,32767,0.15707199573516845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,3,0.01598079949617386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,7,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,float16,65535,0.5954976081848145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,15,0.014815999567508698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,2,128,1,float16,fp8,65535,0.30232479572296145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,63,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,31,0.01656319946050644
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,127,0.015223999321460725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,255,0.014558400213718414
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,1,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,511,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,3,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,7,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,511,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,1023,0.014535999298095703
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,1023,0.008401600271463394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,15,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,2047,0.015516799688339234
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,127,0.00864799991250038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,2047,0.008486399799585343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,8191,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,31,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,4095,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,63,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,4095,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,8191,0.01873279958963394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,255,0.008406399935483932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,16383,0.0225600004196167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,16383,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,32767,0.026841598749160766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,3,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,32767,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,3,0.008607999980449676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,float16,65535,0.029025599360466003
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,2,128,1,float16,fp8,65535,0.020534400641918183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,1,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,1,0.008481600135564805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,7,0.018632000684738158
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,7,0.008408000320196151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,15,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,15,0.008499199897050858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,31,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,31,0.008534400165081025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,63,0.01855199933052063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,63,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,127,0.017643199861049653
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,127,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,255,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,2047,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,255,0.008392000198364257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,511,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,511,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,1023,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,1023,0.008686400204896926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,16383,0.014496000111103058
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,2047,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,4095,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,4095,0.010532800108194351
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,8191,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,8191,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,16383,0.024753600358963013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,32767,0.026868799328804018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,32767,0.01650879979133606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,float16,65535,0.048056000471115114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,2,128,1,float16,fp8,65535,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,1,0.01674239933490753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,1,0.008327999711036682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,3,0.016684800386428833
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,3,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,7,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,7,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,15,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,15,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,31,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,31,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,63,0.01661120057106018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,63,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,127,0.017584000527858735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,127,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,255,0.01757279932498932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,255,0.008542399853467941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,511,0.01706240028142929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,4095,0.013891200721263885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,511,0.008401600271463394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,1023,0.01878879964351654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,1023,0.00856959968805313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,2047,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,2047,0.010552000254392624
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,4095,0.041099199652671815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,8191,0.05942400097846985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,32767,0.08249760270118714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,8191,0.029051199555397034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,16383,0.09484320282936096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,16383,0.04594399929046631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,32767,0.16663520336151122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,1,0.016548800468444824
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,1,0.008523199707269669
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,3,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,float16,65535,0.3106911897659302
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,2,128,1,float16,fp8,65535,0.15752799510955812
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,3,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,7,0.016590400040149687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,7,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,15,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,15,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,31,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,31,0.008491200208663941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,63,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,63,0.008505599945783615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,127,0.016710400581359863
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,127,0.00854559987783432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,255,0.01685599982738495
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,255,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,511,0.018648000061511995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,511,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,1023,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,1023,0.008636800199747085
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,2047,0.018811200559139252
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,2047,0.010103999823331832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,4095,0.01945600062608719
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,4095,0.010569600015878677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,8191,0.02266400009393692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,8191,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,16383,0.024774399399757386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,16383,0.013251200318336487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,32767,0.0443231999874115
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,32767,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,float16,65535,0.06334239840507508
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,7,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,2,128,1,float16,fp8,65535,0.03271839916706085
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,1,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,1,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,3,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,3,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,7,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,15,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,15,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,31,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,31,0.008508799970149994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,63,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,63,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,127,0.01887200027704239
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,127,0.008721599727869034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,255,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,255,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,2047,0.0595088005065918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,2047,0.028380799293518066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,511,0.02106720060110092
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,511,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,1023,0.041176000237464906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,1023,0.01348160058259964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,4095,0.09473440051078796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,4095,0.04704799950122833
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,8191,0.16713119745254518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,8191,0.08508800268173218
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,16383,0.3122688055038452
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,16383,0.15986080169677735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,1,0.02263039946556091
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,1,0.011105599999427795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,3,0.022830399870872497
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,3,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,7,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,7,0.010502400249242783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,float16,32767,0.6020080089569092
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,15,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,2,128,1,float16,fp8,32767,0.3119760036468506
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,15,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,31,0.022673599421977997
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,31,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,255,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,63,0.022649599611759184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,127,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,63,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,127,0.011007999628782272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,255,0.022761599719524385
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,511,0.04168800115585327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,511,0.014500799775123595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,4095,0.17132480144500734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,1023,0.06077920198440552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,1023,0.028907200694084166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,1,0.028281599283218384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,2047,0.0976207971572876
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,2047,0.04725280106067657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,7,0.028911998867988585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,4095,0.08391519784927368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,1,0.015043200552463531
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,3,0.028854399919509888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,float16,8191,0.32085280418395995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,2,128,1,float16,fp8,8191,0.1579360008239746
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,3,0.014985600113868713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,7,0.014699199795722961
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,15,0.028828799724578857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,15,0.0147024005651474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,31,0.028811201453208923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,255,0.015123200416564942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,31,0.014662399888038635
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,63,0.02892000079154968
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,63,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,127,0.02880159914493561
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,127,0.014521600306034088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,255,0.04301599860191345
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,511,0.06264320015907288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,511,0.028814399242401124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,1023,0.09811519980430602
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,1023,0.04760160148143768
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,3,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,2047,0.17121920585632325
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,2047,0.0858560025691986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,1,0.018116800487041472
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,1,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,float16,4095,0.31525440216064454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,2,128,1,float16,fp8,4095,0.15986239910125732
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,3,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,7,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,7,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,15,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,15,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,31,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,31,0.008568000048398972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,63,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,63,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,127,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,127,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,255,0.01855680048465729
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,255,0.008529599756002426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,511,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,511,0.008640000224113464
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,1023,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,1023,0.008515200018882752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,2047,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,2047,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,16383,0.01541920006275177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,4095,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,4095,0.010526400059461594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,8191,0.02282879948616028
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,8191,0.012518399953842163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,16383,0.0432096004486084
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,32767,0.06340479850769043
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,32767,0.030972799658775328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,float16,65535,0.09664480090141296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,2,128,1,float16,fp8,65535,0.04938879907131195
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,1,0.03727520108222961
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,1,0.02473919987678528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,3,0.03718079924583435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,3,0.023559999465942384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,7,0.03755680024623871
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,7,0.02399519979953766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,15,0.038236799836158755
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,15,0.024027200043201448
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,31,0.038673600554466246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,31,0.024905599653720856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,255,0.06204959750175476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,63,0.03809759914875031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,63,0.02497120052576065
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,127,0.04611999988555908
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,127,0.024545599520206452
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,255,0.0303631991147995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,511,0.04778240025043488
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,511,0.09851999878883362
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,1023,0.17040959596633912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,1023,0.08482879996299744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,1,0.05751199722290039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,1,0.04112800061702728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,fp8,2047,0.15750399827957154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,2,128,1,float16,float16,2047,0.31572000980377196
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,3,0.05761119723320007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,3,0.0411296010017395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,7,0.05752639770507813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,7,0.0412992000579834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,63,0.041140800714492796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,15,0.05764960050582886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,15,0.04115679860115051
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,31,0.05995839834213257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,31,0.041300800442695615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,63,0.06392319798469544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,127,0.0431088000535965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,511,0.17155840396881103
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,127,0.06780959963798523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,1,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,255,0.09950559735298156
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,255,0.05146239995956421
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,511,0.08524640202522278
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,1,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,3,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,3,0.016598400473594666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,float16,1023,0.3148432016372681
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,2,128,1,float16,fp8,1023,0.15879039764404296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,7,0.01658560037612915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,7,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,15,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,15,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,31,0.016569599509239197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,31,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,63,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,63,0.00835840031504631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,127,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,127,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,255,0.016756799817085267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,2047,0.008961600065231324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,255,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,511,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,511,0.00833280012011528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,1023,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,1023,0.008531200140714646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,2047,0.01876640021800995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,16383,0.02892799973487854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,4095,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,4095,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,8191,0.03913759887218475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,1,0.017262400686740877
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,8191,0.013278399407863618
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,16383,0.05824480056762695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,32767,0.0942911982536316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,32767,0.04726240038871765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,float16,65535,0.16687999963760375
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,2,128,1,float16,fp8,65535,0.08384479880332947
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,1,0.008348800241947174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,3,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,3,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,63,0.017078399658203125
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,7,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,15,0.016673600673675536
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,7,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,15,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,31,0.017392000555992125
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,31,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,127,0.01849440038204193
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,63,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,127,0.008393599838018417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,255,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,255,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,511,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,511,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,1023,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,1023,0.009996800124645234
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,2047,0.021427200734615327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,2047,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,4095,0.04118239879608154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,4095,0.014190399646759033
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,8191,0.05950239896774292
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,8191,0.028918400406837463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,16383,0.09498559832572936
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,16383,0.04662080109119415
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,32767,0.16642240285873414
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,32767,0.08234239816665649
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,65535,0.3096656084060669
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,65535,0.15496159791946412
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,1,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,3,0.012571200728416443
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,float16,131071,0.5970895767211915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,1,1,128,1,float16,fp8,131071,0.298857593536377
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,7,0.014617599546909332
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,15,0.013152000308036805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,31,0.01348000019788742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,63,0.014507199823856353
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,127,0.012854400277137756
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,255,0.014638400077819825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,1,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,255,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,511,0.014579200744628906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,511,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,1023,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,3,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,1023,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,2047,0.014499199390411378
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,7,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,2047,0.008716800063848496
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,4095,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,15,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,4095,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,31,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,8191,0.01759839951992035
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,8191,0.012478400021791458
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,63,0.008329600095748901
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,16383,0.022808000445365906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,127,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,16383,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,1,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,32767,0.030244800448417663
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,32767,0.02473440021276474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,65535,0.030982398986816408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,65535,0.024935999512672426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,float16,131071,0.0329039990901947
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,15,0.016551999747753142
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,1,1,128,1,float16,fp8,131071,0.026759999990463256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,31,0.016606399416923524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,1,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,3,0.014564800262451171
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,3,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,7,0.015089599788188935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,7,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,255,0.014564800262451171
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,15,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,31,0.008614400029182434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,63,0.016705599427223206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,63,0.00849440023303032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,127,0.014560000598430633
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,127,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,255,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,511,0.015483200550079346
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,511,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,1023,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,1023,0.00867839977145195
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,2047,0.01669120043516159
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,2047,0.008656000345945358
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,4095,0.016683200001716615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,4095,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,8191,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,8191,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,16383,0.024852800369262695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,16383,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,32767,0.026659199595451356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,1,0.008342400193214417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,32767,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,65535,0.028497600555419923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,65535,0.020139199495315552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,float16,131071,0.04903199970722198
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,1,1,128,1,float16,fp8,131071,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,1,0.01672479957342148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,3,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,3,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,63,0.01659359931945801
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,7,0.0166703999042511
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,7,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,15,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,15,0.008337599784135818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,31,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,31,0.008320000022649765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,63,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,511,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,127,0.016785599291324615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,127,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,2047,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,255,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,255,0.008312000334262848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,511,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,1023,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,1023,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,2047,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,4095,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,4095,0.010718400031328202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,8191,0.03935840129852295
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,32767,0.04629920125007629
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,8191,0.01395840048789978
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,16383,0.05881919860839844
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,16383,0.028560000658035278
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,32767,0.09583680033683777
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,1,0.008343999832868576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,65535,0.16796640157699586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,65535,0.08290560245513916
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,1,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,3,0.01671680063009262
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,float16,131071,0.3092319965362549
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,1,1,128,1,float16,fp8,131071,0.1598896026611328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,3,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,7,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,7,0.008393599838018417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,15,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,15,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,31,0.01671680063009262
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,31,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,63,0.016843199729919434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,63,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,127,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,127,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,255,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,255,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,511,0.016790400445461272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,511,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,1023,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,1023,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,2047,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,2047,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,4095,0.019865599274635316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,4095,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,8191,0.020787200331687926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,8191,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,16383,0.023427200317382813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,16383,0.014510400593280792
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,32767,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,32767,0.016459199786186218
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,65535,0.04724319875240326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,65535,0.01873279958963394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,float16,131071,0.06586880087852479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,1,1,128,1,float16,fp8,131071,0.0351936012506485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,1,0.016755199432373045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,1,0.008337599784135818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,3,0.017257599532604216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,3,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,7,0.017158399522304534
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,63,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,7,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,15,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,15,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,31,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,31,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,511,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,63,0.018539200723171233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,127,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,127,0.01855359971523285
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,2047,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,255,0.01812160015106201
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,255,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,511,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,1023,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,1023,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,2047,0.04026240110397339
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,4095,0.059592002630233766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,4095,0.028091201186180116
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,8191,0.09502720236778259
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,8191,0.04632480144500732
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,16383,0.16727999448776246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,16383,0.08237919807434083
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,32767,0.30952959060668944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,1,0.01876319944858551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,32767,0.1542639970779419
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,1,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,3,0.019006399810314177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,3,0.008664000034332275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,7,0.018804800510406495
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,7,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,fp8,65535,0.2987648010253906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,63,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,1,1,128,1,float16,float16,65535,0.5966896057128906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,15,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,15,0.008564800024032593
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,255,0.019116799533367156
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,31,0.020230400562286376
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,31,0.00926079973578453
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,63,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,1023,0.041289600729942325
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,127,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,127,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,511,0.02282399982213974
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,255,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,511,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,1023,0.013673600554466248
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,2047,0.061108797788619995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,2047,0.028812798857688903
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,4095,0.09689279794692993
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,4095,0.04724319875240326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,8191,0.16862399578094484
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,8191,0.08325759768486023
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,1,0.02272319942712784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,7,0.022793599963188173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,1,0.01247360035777092
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,float16,16383,0.31415839195251466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,1,1,128,1,float16,fp8,16383,0.15835039615631102
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,3,0.022881600260734557
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,31,0.023636800050735474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,3,0.012135999649763108
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,7,0.010908800363540649
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,15,0.022708800435066224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,15,0.010896000266075134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,31,0.011036799848079681
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,63,0.022944000363349915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,63,0.012478400021791458
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,127,0.023180800676345825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,127,0.011086399853229522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,255,0.023219199478626253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,255,0.011020799726247787
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,511,0.043243199586868286
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,511,0.014455999433994293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,1023,0.06164000034332275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,4095,0.17322560548782348
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,1023,0.029110398888587952
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,2047,0.0993776023387909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,2047,0.04735040068626404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,4095,0.08269119858741761
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,1,0.016737599670886994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,1,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,3,0.01681919991970062
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,float16,8191,0.3210975885391235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,3,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,1,1,128,1,float16,fp8,8191,0.15622559785842896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,31,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,7,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,7,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,15,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,15,0.008318399637937545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,255,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,31,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,63,0.01724800020456314
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,63,0.008380799740552902
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,127,0.016673600673675536
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,127,0.008531200140714646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,255,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,4095,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,511,0.018569600582122803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,511,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,8191,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,1023,0.01672160029411316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,1023,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,2047,0.018824000656604768
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,2047,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,4095,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,8191,0.022833600640296936
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,16383,0.024719999730587007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,16383,0.014505599439144135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,32767,0.044547200202941895
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,32767,0.01653759926557541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,65535,0.06363199949264527
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,65535,0.032180801033973694
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,float16,131071,0.09983199834823608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,1,1,128,1,float16,fp8,131071,0.05111200213432312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,1,0.027907198667526244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,15,0.01652639955282211
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,1,0.01658879965543747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,3,0.028417599201202393
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,3,0.01652639955282211
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,7,0.028948798775672913
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,7,0.016678400337696075
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,15,0.028811201453208923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,31,0.027683201432228088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,31,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,63,0.02890399992465973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,63,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,127,0.02905600070953369
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,127,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,255,0.04416959881782532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,255,0.015596799552440643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,511,0.06166399717330932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,511,0.028841599822044373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,1023,0.09867039918899537
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,1023,0.047884801030159
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,4095,0.16035360097885132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,fp8,2047,0.08564640283584594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,3,0.037171199917793274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,2047,0.3528032064437866
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,1,0.03714079856872558
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,1,1,128,1,float16,float16,4095,0.31659998893737795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,15,0.038991999626159665
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,1,0.02470880001783371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,3,0.02476799935102463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,7,0.0390751987695694
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,7,0.024864000082015992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,15,0.0247311994433403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,127,0.045204800367355344
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,31,0.024864000082015992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,31,0.03710559904575348
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,63,0.038299199938774106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,63,0.024835200607776643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,127,0.02486719936132431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,255,0.061799997091293336
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,255,0.0308896005153656
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,511,0.09877279996871949
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,511,0.04877119958400726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,1023,0.17214879989624024
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,3,0.016575999557971954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,1023,0.08579679727554321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,1,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,1,0.008566399663686752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,float16,2047,0.31598401069641113
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,1,1,128,1,float16,fp8,2047,0.15729600191116333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,3,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,7,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,7,0.008601599931716919
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,15,0.018559999763965607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,15,0.008686400204896926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,31,0.016836799681186676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,31,0.008550400286912918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,63,0.018441599607467652
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,511,0.018568000197410582
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,63,0.008603200316429138
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,511,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,127,0.017900800704956053
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,127,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,255,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,255,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,1023,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,8191,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,1023,0.008852799981832504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,2047,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,2047,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,4095,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,4095,0.011163199692964554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,8191,0.0227743998169899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,16383,0.043996798992156985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,16383,0.014934399724006652
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,32767,0.06168320178985596
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,32767,0.030931198596954347
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,65535,0.09810240268707275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,65535,0.04910880029201507
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,float16,131071,0.17019200325012207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,1,1,128,1,float16,fp8,131071,0.08641759753227234
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,1,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,3,0.02276480048894882
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,15,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,1,0.02294880002737045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,31,0.022793599963188173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,31,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,3,0.012556800246238708
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,63,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,7,0.012624000012874604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,15,0.022809599339962006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,7,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,63,0.02277279943227768
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,127,0.022793599963188173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,127,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,255,0.023244799673557283
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,255,0.012601600587368011
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,511,0.043224000930786134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,511,0.016575999557971954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,1023,0.061643201112747195
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,1023,0.03086400032043457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,float16,2047,0.09868000149726867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,128,8,128,1,float16,fp8,2047,0.05030559897422791
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,1,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,3,0.019942399859428406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,1,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,15,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,31,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,3,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,7,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,7,0.010552000254392624
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,15,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,31,0.008654399961233138
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,63,0.018559999763965607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,63,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,127,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,127,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,255,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,255,0.009614399820566177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,511,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,1023,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,511,0.009932799637317658
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,1023,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,float16,2047,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,128,8,128,1,float16,fp8,2047,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,1,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,1,0.008980800211429597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,7,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,3,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,31,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,3,0.01874080002307892
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,63,0.018624000251293182
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,7,0.008979199826717377
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,15,0.018783999979496
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,15,0.008956799656152726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,31,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,63,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,127,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,127,0.01024319976568222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,255,0.0192208006978035
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,255,0.0090768001973629
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,511,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,511,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,1023,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,1023,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,float16,2047,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,128,8,128,1,float16,fp8,2047,0.010576000064611435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,3,0.01021760031580925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,3,0.01930239945650101
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,1,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,7,0.01029760017991066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,1,0.019222399592399596
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,7,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,15,0.018742400407791137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,15,0.010289599746465683
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,255,0.01870879977941513
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,31,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,31,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,63,0.01960480064153671
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,63,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,127,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,127,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,255,0.010313600301742554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,511,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,511,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,1023,0.04116320013999939
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,1023,0.016390399634838106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,float16,2047,0.060468798875808714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,128,8,128,1,float16,fp8,2047,0.029836800694465638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,1,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,1,0.008582399785518646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,3,0.008892799913883209
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,3,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,7,0.01809599995613098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,7,0.008484800159931184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,15,0.01661919951438904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,15,0.009060800075531006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,31,0.01812800019979477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,255,0.018492799997329713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,31,0.00878560021519661
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,63,0.01855680048465729
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,63,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,127,0.016771200299263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,127,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,255,0.008852799981832504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,511,0.01658560037612915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,511,0.008380799740552902
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,1023,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,1023,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,fp8,2047,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,128,8,128,1,float16,float16,2047,0.019566400349140166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,1,0.030268800258636475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,7,0.030564799904823303
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,1,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,31,0.03096640110015869
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,3,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,7,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,3,0.030377599596977233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,15,0.030073601007461547
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,15,0.016809600591659545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,31,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,63,0.03088639974594116
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,63,0.016868799924850464
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,127,0.033020800352096556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,127,0.01677920073270798
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,255,0.04326240122318268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,255,0.01701440066099167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,511,0.06197919845581055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,511,0.03070560097694397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,float16,1023,0.09889760017395019
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,128,8,128,1,float16,fp8,1023,0.050843197107315066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,fp8,3,0.02691200077533722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,float16,1,0.04318560063838959
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,fp8,1,0.02701759934425354
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,fp8,7,0.026902401447296144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,float16,7,0.04328320026397705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,float16,3,0.04320000112056732
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,float16,15,0.04333600103855133
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,fp8,15,0.026915198564529418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,float16,31,0.043263998627662656
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,fp8,31,0.02687999904155731
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,fp8,63,0.026966398954391478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,float16,63,0.045244801044464114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,fp8,127,0.028336000442504884
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,float16,127,0.04947519898414612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,float16,255,0.06483839750289917
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,128,8,128,1,float16,fp8,255,0.03218719959259033
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,float16,3,0.07003999948501587
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,float16,7,0.0690559983253479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,float16,15,0.07197440266609192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,fp8,3,0.047393599152565004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,float16,1,0.06853439807891845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,fp8,1,0.04737440049648285
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,fp8,15,0.04738720059394837
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,float16,127,0.07638559937477112
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,float16,31,0.07586399912834167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,fp8,7,0.04737280011177063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,fp8,31,0.04743359982967377
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,float16,63,0.07552639842033386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,fp8,63,0.04786239862442017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,128,8,128,1,float16,fp8,127,0.0488431990146637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,1,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,1,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,7,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,3,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,3,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,63,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,7,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,15,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,15,0.008662399649620057
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,31,0.018646399676799773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,31,0.010148800164461135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,127,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,63,0.010206399857997895
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,127,0.009120000153779983
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,255,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,255,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,2047,0.0125231996178627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,511,0.018745599687099455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,511,0.009116800129413604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,1023,0.01929599940776825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,fp8,1023,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,128,8,128,1,float16,float16,2047,0.02239519953727722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,fp8,1,0.08675680160522461
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,fp8,3,0.08722559809684753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,float16,3,0.1303920030593872
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,fp8,7,0.08761759996414184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,float16,7,0.1313264012336731
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,float16,15,0.13000479936599732
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,float16,1,0.1256495952606201
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,fp8,15,0.08813760280609131
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,float16,31,0.12995200157165526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,fp8,31,0.08833119869232178
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,float16,63,0.12931840419769286
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,128,8,128,1,float16,fp8,63,0.08844799995422363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,fp8,1,0.16715680360794066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,float16,1,0.2361520051956177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,fp8,3,0.1668287992477417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,fp8,7,0.16709760427474976
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,float16,7,0.23486239910125734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,float16,3,0.23466720581054687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,fp8,15,0.1672111988067627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,float16,15,0.2392591953277588
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,float16,31,0.2359760046005249
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,128,8,128,1,float16,fp8,31,0.16643040180206298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,3,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,7,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,3,0.01713919937610626
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,1,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,31,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,1,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,63,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,7,0.008315200358629227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,15,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,15,0.008387199789285659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,31,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,63,0.018534399569034576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,127,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,127,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,255,0.01672320067882538
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,511,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,255,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,511,0.009096000343561172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,1023,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,1023,0.010915199667215348
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,fp8,2047,0.01451680064201355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,128,8,128,1,float16,float16,2047,0.041294398903846743
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,1,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,1,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,7,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,15,0.022891199588775633
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,15,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,7,0.022710399329662324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,3,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,3,0.022899200022220612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,31,0.022801600396633148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,63,0.022767999768257143
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,31,0.012577599287033081
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,63,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,127,0.022708800435066224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,127,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,255,0.022886399924755097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,255,0.012601600587368011
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,511,0.04273279905319214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,511,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,1023,0.061780798435211184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,1023,0.03078399896621704
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,float16,2047,0.09898880124092102
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,96,8,128,1,float16,fp8,2047,0.05072640180587769
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,1,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,1,0.010000000149011612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,7,0.01881600022315979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,7,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,31,0.01796800047159195
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,3,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,3,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,15,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,15,0.008948799967765809
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,63,0.01850239932537079
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,31,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,63,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,127,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,127,0.009511999785900116
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,255,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,255,0.009457600116729737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,511,0.01873279958963394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,511,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,1023,0.019411200284957887
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,1023,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,float16,2047,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,96,8,128,1,float16,fp8,2047,0.010502400249242783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,1,0.019054399430751802
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,7,0.018774400651454925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,7,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,3,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,15,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,3,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,1,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,15,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,31,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,31,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,63,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,63,0.010502400249242783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,127,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,127,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,255,0.018739199638366698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,255,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,511,0.01919199973344803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,511,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,1023,0.018799999356269838
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,1023,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,float16,2047,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,96,8,128,1,float16,fp8,2047,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,1,0.019096000492572783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,1,0.00965920016169548
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,3,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,7,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,3,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,7,0.01019200012087822
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,15,0.019105599820613862
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,15,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,31,0.018651199340820313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,31,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,63,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,63,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,127,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,127,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,255,0.01907680034637451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,255,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,511,0.02239519953727722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,511,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,1023,0.0414112001657486
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,1023,0.016433599591255187
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,float16,2047,0.06116960048675537
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,96,8,128,1,float16,fp8,2047,0.0296207994222641
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,3,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,7,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,15,0.016761599481105803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,15,0.008544000238180161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,3,0.00891520008444786
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,1,0.008892799913883209
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,1,0.01746080070734024
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,31,0.016684800386428833
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,7,0.008955200016498566
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,31,0.008654399961233138
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,63,0.018729600310325622
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,63,0.008563199639320373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,127,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,127,0.008644799888134002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,255,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,255,0.008881600201129913
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,511,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,511,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,1023,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,1023,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,float16,2047,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,96,8,128,1,float16,fp8,2047,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,1,0.01659200042486191
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,3,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,15,0.030403199791908263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,7,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,3,0.030086401104927062
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,15,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,1,0.02892639935016632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,7,0.030051198601722718
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,31,0.030811199545860292
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,31,0.016625599563121797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,63,0.030287998914718627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,63,0.016606399416923524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,511,0.061692798137664796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,127,0.030969598889350893
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,127,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,1023,0.05119360089302063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,255,0.04267840087413788
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,255,0.01716320067644119
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,fp8,511,0.030870398879051207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,96,8,128,1,float16,float16,1023,0.09862080216407776
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,1,0.04328480064868927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,1,0.027063998579978942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,3,0.026902401447296144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,7,0.026939201354980468
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,3,0.04325439929962158
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,7,0.04318079948425293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,15,0.04327360093593598
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,15,0.02696320116519928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,31,0.04324800074100495
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,31,0.026972800493240356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,63,0.045131200551986696
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,63,0.026907199621200563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,127,0.04925439953804016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,127,0.0277072012424469
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,float16,255,0.06381760239601135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,96,8,128,1,float16,fp8,255,0.031948798894882204
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,fp8,1,0.04742560088634491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,float16,3,0.06907520294189454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,float16,7,0.0684943974018097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,float16,15,0.07292320132255554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,float16,1,0.06836959719657898
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,fp8,3,0.04736160039901734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,fp8,15,0.04744960069656372
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,fp8,7,0.04748800098896026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,float16,31,0.07446560263633728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,fp8,31,0.04734239876270294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,float16,63,0.07578880190849305
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,fp8,63,0.04759039878845215
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,float16,127,0.07571039795875549
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,96,8,128,1,float16,fp8,127,0.049004799127578734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,3,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,1,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,1,0.01014079973101616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,15,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,7,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,15,0.0087567999958992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,3,0.01005759984254837
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,7,0.010307200253009796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,31,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,31,0.008539199829101562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,63,0.018760000169277192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,63,0.008859200030565261
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,127,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,127,0.010107199847698211
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,255,0.018756799399852753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,255,0.010267200320959092
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,511,0.018564799427986146
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,511,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,1023,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,1023,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,float16,2047,0.022705599665641785
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,96,8,128,1,float16,fp8,2047,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,fp8,1,0.08668320178985596
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,float16,3,0.12475520372390747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,fp8,3,0.08648639917373657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,fp8,7,0.08670079708099365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,float16,7,0.1292415976524353
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,float16,1,0.1228767991065979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,float16,15,0.1302832007408142
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,fp8,15,0.08693439960479736
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,float16,31,0.12941759824752808
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,fp8,31,0.08831200003623962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,float16,63,0.12934080362319947
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,96,8,128,1,float16,fp8,63,0.08823840022087097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,float16,1,0.23962080478668213
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,float16,3,0.23458559513092042
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,fp8,3,0.16648000478744507
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,fp8,7,0.16647839546203613
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,fp8,1,0.1662511944770813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,float16,15,0.2392656087875366
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,float16,7,0.23425600528717042
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,fp8,15,0.1662287950515747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,float16,31,0.233787202835083
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,96,8,128,1,float16,fp8,31,0.16654560565948487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,1,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,3,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,7,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,7,0.016551999747753142
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,3,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,1,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,15,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,15,0.017164799571037292
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,127,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,31,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,31,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,63,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,63,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,127,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,255,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,255,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,2047,0.014614400267601014
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,511,0.018624000251293182
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,511,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,1023,0.02144639939069748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,fp8,1023,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,96,8,128,1,float16,float16,2047,0.041222399473190306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,1,0.01891999989748001
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,1,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,3,0.0189983993768692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,3,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,7,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,7,0.01887200027704239
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,15,0.019020800292491914
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,15,0.009854400157928466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,31,0.01921440064907074
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,31,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,63,0.01878879964351654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,63,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,127,0.018694399297237395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,127,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,255,0.019233599305152893
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,255,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,511,0.022672000527381896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,511,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,1023,0.04154880046844482
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,1023,0.014710399508476257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,2047,0.06132320165634155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,2047,0.029518398642539977
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,float16,4095,0.09665600061416627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,1,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,4,128,1,float16,fp8,4095,0.04912639856338501
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,1,0.010911999642848969
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,3,0.020923200249671935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,3,0.010627199709415436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,7,0.020694400370121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,7,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,15,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,15,0.010539200156927109
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,31,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,31,0.010622400045394897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,63,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,63,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,127,0.021403199434280394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,127,0.010708799958229065
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,255,0.021643200516700746
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,255,0.010713600367307664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,511,0.04111199975013733
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,511,0.014476799964904785
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,1023,0.06109920144081116
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,1023,0.029124799370765685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,2047,0.0972432017326355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,2047,0.04804159998893738
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,float16,4095,0.17367680072784425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,64,8,128,1,float16,fp8,4095,0.08831520080566406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,1,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,3,0.010288000106811523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,1,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,7,0.009382399916648864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,7,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,3,0.018723200261592864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,15,0.017635199427604675
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,15,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,127,0.008724799752235413
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,31,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,255,0.009561599791049957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,31,0.009888000041246413
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,63,0.018632000684738158
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,63,0.010284800082445145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,127,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,255,0.018772800266742707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,511,0.018911999464035035
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,511,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,1023,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,1023,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,2047,0.01908639967441559
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,2047,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,float16,4095,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,4,128,1,float16,fp8,4095,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,1,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,1,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,15,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,3,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,31,0.017022399604320525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,3,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,7,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,7,0.008508799970149994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,15,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,255,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,31,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,63,0.01674239933490753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,127,0.017667199671268462
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,127,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,63,0.008535999804735184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,511,0.01876160055398941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,255,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,511,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,1023,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,1023,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,2047,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,2047,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,float16,4095,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,64,8,128,1,float16,fp8,4095,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,1,0.00870240032672882
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,1,0.018798400461673737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,7,0.019531199336051942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,3,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,7,0.010636799782514573
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,3,0.017609600722789765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,15,0.01886879950761795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,15,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,31,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,31,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,63,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,63,0.01027199998497963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,127,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,127,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,255,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,255,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,511,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,511,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,1023,0.01982239931821823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,1023,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,2047,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,2047,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,float16,4095,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,4,128,1,float16,fp8,4095,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,7,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,1,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,1,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,3,0.01671359986066818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,3,0.008422400057315826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,7,0.01703680008649826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,15,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,15,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,127,0.008510400354862214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,31,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,511,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,31,0.008404800295829773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,63,0.016755199432373045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,63,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,127,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,255,0.017555199563503265
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,255,0.008548799902200699
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,1023,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,511,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,1023,0.008568000048398972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,2047,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,2047,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,float16,4095,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,64,8,128,1,float16,fp8,4095,0.010595200210809707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,1,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,3,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,1,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,7,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,15,0.017211200296878816
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,15,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,31,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,3,0.017315199971199034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,7,0.017803199589252472
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,31,0.00833119973540306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,63,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,63,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,127,0.017422400414943695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,127,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,255,0.016737599670886994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,255,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,511,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,511,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,1023,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,1023,0.010611200332641601
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,2047,0.041094401478767396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,2047,0.014635199308395385
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,float16,4095,0.0597536027431488
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,4,128,1,float16,fp8,4095,0.02911359965801239
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,1,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,1,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,3,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,3,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,31,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,7,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,7,0.008823999762535095
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,15,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,15,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,31,0.008393599838018417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,63,0.01873600035905838
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,63,0.00873119980096817
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,511,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,127,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,127,0.008560000360012055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,255,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,1023,0.014377599954605103
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,255,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,511,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,1023,0.04122079908847809
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,2047,0.02895520031452179
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,2047,0.05974559783935547
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,float16,4095,0.09629439711570739
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,64,8,128,1,float16,fp8,4095,0.04801760017871857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,1,0.009748800098896027
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,1,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,7,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,3,0.01810240000486374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,31,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,15,0.010531199723482132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,63,0.018798400461673737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,15,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,127,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,3,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,7,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,31,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,63,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,127,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,255,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,255,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,511,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,511,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,1023,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,1023,0.010577599704265594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,2047,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,2047,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,float16,4095,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,4,128,1,float16,fp8,4095,0.012582400441169738
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,1,0.016728000342845918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,1,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,3,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,3,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,7,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,63,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,7,0.008406399935483932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,15,0.016575999557971954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,15,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,31,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,31,0.00849440023303032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,63,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,127,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,127,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,255,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,255,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,511,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,511,0.00838399976491928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,1023,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,1023,0.00854559987783432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,2047,0.019011199474334717
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,2047,0.009155199676752091
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,float16,4095,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,64,8,128,1,float16,fp8,4095,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,1,0.023193599283695222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,3,0.02287999987602234
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,7,0.023217600584030152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,7,0.01247519999742508
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,15,0.012441600114107132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,15,0.02274879962205887
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,1,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,31,0.022771200537681578
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,3,0.012503999471664428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,31,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,63,0.02306559979915619
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,63,0.012665599584579468
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,127,0.02282080054283142
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,127,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,255,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,255,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,511,0.04264479875564575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,511,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,1023,0.06249759793281555
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,1023,0.030870398879051207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,float16,2047,0.09886879920959472
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,3,0.014603200554847717
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,7,0.02677919864654541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,15,0.026795199513435362
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,4,128,1,float16,fp8,2047,0.05013120174407959
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,1,0.02689119875431061
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,1,0.014713600277900696
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,3,0.026897600293159483
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,7,0.014552000164985656
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,15,0.014753599464893342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,31,0.02677919864654541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,31,0.015011200308799743
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,63,0.026843199133872987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,63,0.014703999459743499
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,127,0.026849600672721862
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,127,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,255,0.041912001371383664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,255,0.01515199989080429
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,511,0.0615119993686676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,511,0.02924480140209198
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,1023,0.0974399983882904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,1023,0.04927839934825897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,float16,2047,0.17244479656219483
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,64,8,128,1,float16,fp8,2047,0.08672800064086914
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,1,0.030924800038337707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,3,0.030865600705146788
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,3,0.017156800627708434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,15,0.03086720108985901
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,7,0.016758400201797485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,15,0.016857600212097167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,31,0.030995199084281923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,31,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,1,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,63,0.030868801474571227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,63,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,127,0.0325408011674881
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,7,0.030958399176597595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,127,0.01706559956073761
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,255,0.04335519969463349
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,255,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,float16,511,0.0634447991847992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,4,128,1,float16,fp8,511,0.030771198868751525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,1,0.03707039952278137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,1,0.023313599824905395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,3,0.03702400028705597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,3,0.023158399760723113
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,7,0.037027201056480406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,7,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,15,0.0370608001947403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,15,0.02381120026111603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,31,0.03707840144634247
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,31,0.023654399812221526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,63,0.03867999911308288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,63,0.022896000742912294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,127,0.04509440064430237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,127,0.023907199501991272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,255,0.06361759901046753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,255,0.03076159954071045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,float16,511,0.10018240213394165
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,64,8,128,1,float16,fp8,511,0.049211201071739194
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,float16,1,0.04516800045967102
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,fp8,3,0.026931199431419372
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,float16,7,0.04489760100841522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,fp8,1,0.02723200023174286
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,fp8,7,0.027463999390602113
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,float16,15,0.044491198658943173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,float16,3,0.045230400562286374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,fp8,15,0.02686080038547516
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,float16,31,0.04367679953575134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,fp8,31,0.02699359953403473
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,float16,63,0.045187199115753175
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,fp8,63,0.026976001262664796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,float16,127,0.050518399477005003
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,fp8,127,0.028641599416732787
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,float16,255,0.06534399986267089
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,4,128,1,float16,fp8,255,0.03126559853553772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,float16,7,0.05761600136756897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,float16,1,0.05758399963378906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,fp8,1,0.03918080031871796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,float16,3,0.05761759877204895
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,fp8,3,0.04004800021648407
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,fp8,7,0.0403903990983963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,float16,15,0.05760319828987122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,fp8,15,0.03918400108814239
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,float16,31,0.06205120086669922
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,float16,255,0.10329439640045165
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,fp8,31,0.039524799585342406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,float16,63,0.06382399797439575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,fp8,63,0.03988479971885681
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,float16,127,0.06894720196723939
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,fp8,127,0.04210880100727081
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,64,8,128,1,float16,fp8,255,0.051420801877975465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,1,0.018719999492168425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,15,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,7,0.016659200191497803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,1,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,3,0.017579199373722078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,63,0.017161600291728973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,3,0.008660800009965896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,7,0.00854720026254654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,15,0.00897120013833046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,31,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,31,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,63,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,127,0.0181536003947258
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,1023,0.01863040030002594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,127,0.009151999652385712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,255,0.01823360025882721
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,255,0.00910080000758171
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,511,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,511,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,1023,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,2047,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,2047,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,float16,4095,0.022697600722312927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,4,128,1,float16,fp8,4095,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,1,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,1,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,3,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,3,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,7,0.01706079989671707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,7,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,15,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,127,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,15,0.008507200330495835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,31,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,31,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,63,0.018372799456119537
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,511,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,63,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,127,0.017136000096797943
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,255,0.017385600507259368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,255,0.008544000238180161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,511,0.016624000668525696
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,1023,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,1023,0.00852160006761551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,2047,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,2047,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,float16,4095,0.04055359959602356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,64,8,128,1,float16,fp8,4095,0.013310399651527405
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,float16,1,0.06859520077705383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,fp8,7,0.04755200147628784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,fp8,3,0.04751839935779571
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,float16,15,0.0709775984287262
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,fp8,1,0.047491198778152464
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,fp8,15,0.04795039892196655
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,float16,3,0.06978880167007447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,float16,7,0.06893439888954163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,float16,31,0.07416960000991821
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,fp8,31,0.04750080108642578
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,float16,63,0.07625120282173156
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,fp8,63,0.04752799868583679
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,float16,127,0.07582719922065735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,4,128,1,float16,fp8,127,0.04942240118980408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,float16,1,0.09994879961013795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,fp8,1,0.07214239835739136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,float16,3,0.10083680152893067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,fp8,3,0.07220159769058228
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,float16,7,0.10480159521102905
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,fp8,7,0.07234079837799072
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,float16,15,0.10774719715118408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,fp8,15,0.07236800193786622
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,float16,31,0.1086575984954834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,fp8,31,0.07398719787597656
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,float16,63,0.10879839658737182
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,fp8,63,0.07401760220527649
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,float16,127,0.11726080179214478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,64,8,128,1,float16,fp8,127,0.07429919838905334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,float16,3,0.12874720096588135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,float16,7,0.13104000091552734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,fp8,7,0.0885263979434967
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,fp8,1,0.0883296012878418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,float16,15,0.13213440179824829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,fp8,3,0.0884335994720459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,float16,1,0.12755199670791625
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,fp8,15,0.08862720131874084
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,float16,31,0.13184800148010253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,fp8,31,0.08975039720535279
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,float16,63,0.12968000173568725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,4,128,1,float16,fp8,63,0.08886399865150452
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,float16,1,0.18899519443511964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,fp8,1,0.13744479417800903
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,float16,3,0.19118560552597047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,fp8,3,0.13757120370864867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,fp8,7,0.13842240571975709
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,float16,7,0.19242559671401976
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,float16,15,0.19143840074539184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,fp8,15,0.13835519552230835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,float16,31,0.19231040477752687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,fp8,31,0.1377776026725769
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,float16,63,0.19275519847869874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,64,8,128,1,float16,fp8,63,0.1389631986618042
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,3,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,3,0.018649600446224213
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,7,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,1,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,31,0.008560000360012055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,7,0.009728000313043595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,15,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,1,0.00944959968328476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,15,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,31,0.018719999492168425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,63,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,63,0.008560000360012055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,127,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,127,0.009782399982213974
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,255,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,255,0.009054400026798248
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,511,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,511,0.008505599945783615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,1023,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,1,0.016684800386428833
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,1023,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,2047,0.021478399634361267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,2047,0.012415999919176102
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,float16,4095,0.04153920114040375
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,7,0.00833119973540306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,4,128,1,float16,fp8,4095,0.015220800042152404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,15,0.008342400193214417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,3,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,1,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,3,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,7,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,31,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,15,0.017347200214862822
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,31,0.008318399637937545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,63,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,63,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,127,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,127,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,255,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,255,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,511,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,511,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,1023,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,1023,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,2047,0.041222399473190306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,2047,0.012476799637079239
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,float16,4095,0.059515202045440675
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,3,0.016865600645542145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,64,8,128,1,float16,fp8,4095,0.02858879864215851
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,1,0.01675360053777695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,3,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,31,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,7,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,1,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,63,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,15,0.01669919937849045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,127,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,15,0.008342400193214417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,7,0.008419200032949447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,31,0.016832000017166136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,63,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,127,0.016948799788951873
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,255,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,255,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,511,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,511,0.009569600224494934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,1023,0.02077919989824295
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,1023,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,2047,0.04110400080680847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,2047,0.01449279934167862
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,4095,0.05960000157356262
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,4095,0.028944000601768494
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,float16,8191,0.09518240094184875
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,2,128,1,float16,fp8,8191,0.047121599316596985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,1,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,15,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,1,0.008588799834251403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,3,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,3,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,7,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,7,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,15,0.008563199639320373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,31,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,31,0.008668799698352814
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,63,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,63,0.00865119993686676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,127,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,127,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,255,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,255,0.008737599849700928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,511,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,511,0.010572800040245056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,1023,0.04114879965782166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,1023,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,2047,0.059622400999069215
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,2047,0.028697600960731505
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,4095,0.09663040041923524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,4095,0.047337600588798524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,1,0.02136480063199997
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,float16,8191,0.17002719640731812
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,1,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,4,128,1,float16,fp8,8191,0.08481280207633972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,3,0.020759999752044678
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,3,0.011396799981594086
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,7,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,7,0.010619200021028518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,15,0.0212351992726326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,15,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,31,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,31,0.01101280003786087
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,63,0.021593600511550903
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,63,0.010572800040245056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,127,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,255,0.02131199985742569
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,127,0.010969600081443787
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,255,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,511,0.041212800145149234
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,511,0.014814400672912597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,1023,0.060451197624206546
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,1023,0.029552000761032104
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,2047,0.09662240147590637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,2047,0.04860639870166779
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,4095,0.17206560373306273
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,4095,0.08764960169792176
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,fp8,8191,0.1644719958305359
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,32,8,128,1,float16,float16,8191,0.3201391935348511
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,1,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,1,0.009020800143480301
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,7,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,3,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,7,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,3,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,63,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,15,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,15,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,31,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,31,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,63,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,127,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,127,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,255,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,511,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,255,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,511,0.010529600083827972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,1023,0.016598400473594666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,1023,0.009419199824333192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,2047,0.016731199622154237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,2047,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,4095,0.016763199865818024
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,float16,8191,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,8191,0.014630399644374847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,2,128,1,float16,fp8,4095,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,7,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,1,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,1,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,3,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,3,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,7,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,15,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,15,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,31,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,31,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,63,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,63,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,127,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,127,0.008387199789285659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,255,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,255,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,511,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,511,0.008419200032949447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,1023,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,1023,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,2047,0.01789119988679886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,2047,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,4095,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,4095,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,float16,8191,0.022878399491310118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,4,128,1,float16,fp8,8191,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,1,0.018569600582122803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,1,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,3,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,3,0.008718399703502655
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,7,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,7,0.008708799630403519
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,127,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,15,0.01874080002307892
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,127,0.008497600257396699
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,15,0.008396799862384795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,31,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,31,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,63,0.01653439998626709
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,63,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,255,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,255,0.00856959968805313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,511,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,511,0.00852160006761551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,1023,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,1023,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,2047,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,2047,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,4095,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,4095,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,float16,8191,0.022144000232219695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,32,8,128,1,float16,fp8,8191,0.012529599666595458
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,1,0.018561600148677825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,1,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,3,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,7,0.010313600301742554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,7,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,3,0.019529600441455842
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,15,0.018780800700187682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,15,0.008750399947166443
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,31,0.018755200505256652
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,255,0.01764799952507019
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,31,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,63,0.019038400053977965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,63,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,127,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,127,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,2047,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,511,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,511,0.01055999994277954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,255,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,1023,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,1023,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,2047,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,1,0.008646400272846222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,4095,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,4095,0.012163200229406358
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,float16,8191,0.022815999388694764
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,2,128,1,float16,fp8,8191,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,1,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,3,0.016737599670886994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,3,0.008505599945783615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,7,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,7,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,15,0.016598400473594666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,15,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,127,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,31,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,31,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,63,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,63,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,127,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,255,0.01863359957933426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,255,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,511,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,511,0.008422400057315826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,1023,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,1023,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,2047,0.018764799833297728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,2047,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,4095,0.010502400249242783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,4095,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,float16,8191,0.022716799378395082
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,4,128,1,float16,fp8,8191,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,15,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,1,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,31,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,1,0.008510400354862214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,3,0.016659200191497803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,3,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,7,0.016625599563121797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,7,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,15,0.008419200032949447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,31,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,63,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,63,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,127,0.016572800278663636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,127,0.008398400247097015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,255,0.016737599670886994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,255,0.008408000320196151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,511,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,511,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,1023,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,1023,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,2047,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,2047,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,4095,0.020846399664878845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,4095,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,float16,8191,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,32,8,128,1,float16,fp8,8191,0.012556800246238708
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,1,0.00852160006761551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,3,0.010134399682283402
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,7,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,1,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,15,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,7,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,3,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,15,0.01034879982471466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,31,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,31,0.008852799981832504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,63,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,63,0.009203200042247773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,127,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,127,0.008747199922800064
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,255,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,255,0.00921119973063469
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,511,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,511,0.00857279971241951
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,1023,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,1023,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,2047,0.02144159972667694
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,2047,0.012435200065374375
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,4095,0.0412559986114502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,4095,0.01610880047082901
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,float16,8191,0.05965920090675354
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,7,0.016707199811935424
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,2,128,1,float16,fp8,8191,0.029116800427436827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,1,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,1,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,3,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,3,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,7,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,15,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,15,0.00835840031504631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,31,0.01672479957342148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,31,0.008315200358629227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,63,0.016791999340057373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,127,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,63,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,127,0.00833280012011528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,255,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,255,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,511,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,511,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,1023,0.020609599351882935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,1023,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,2047,0.04087679982185364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,2047,0.012670400738716125
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,4095,0.059006398916244505
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,4095,0.028987199068069458
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,float16,8191,0.09477919936180115
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,4,128,1,float16,fp8,8191,0.04623039960861206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,1,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,1,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,3,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,31,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,3,0.008523199707269669
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,7,0.018739199638366698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,7,0.008667200058698653
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,15,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,15,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,31,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,63,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,63,0.008588799834251403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,127,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,127,0.008491200208663941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,255,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,255,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,511,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,511,0.010539200156927109
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,1023,0.04092960059642792
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,1023,0.01313599944114685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,2047,0.05960639715194702
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,2047,0.028839999437332155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,4095,0.09631680250167847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,4095,0.048583999276161194
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,float16,8191,0.17012959718704224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,32,8,128,1,float16,fp8,8191,0.08824959993362427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,7,0.01900160014629364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,1,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,1,0.01053759977221489
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,3,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,7,0.008595199882984161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,3,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,63,0.008659200370311737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,15,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,15,0.009753599762916565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,255,0.018559999763965607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,31,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,31,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,63,0.018644799292087556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,1023,0.01876319944858551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,127,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,127,0.009468799829483033
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,255,0.010196799784898758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,511,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,511,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,1023,0.010547199845314026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,2047,0.020689600706100465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,2047,0.010574399679899215
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,4095,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,float16,8191,0.022752000391483305
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,8191,0.014220799505710601
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,7,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,1,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,2,128,1,float16,fp8,4095,0.012492799758911132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,1,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,3,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,3,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,7,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,63,0.008659200370311737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,15,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,127,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,15,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,31,0.017796799540519714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,31,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,63,0.017731200158596038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,127,0.008419200032949447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,255,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,255,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,511,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,511,0.008588799834251403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,1023,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,1023,0.008484800159931184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,2047,0.018745599687099455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,2047,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,4095,0.020849600434303284
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,4095,0.01080000028014183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,float16,8191,0.02319999933242798
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,4,128,1,float16,fp8,8191,0.012612800300121307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,1,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,1,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,3,0.01658399999141693
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,3,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,7,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,7,0.008401600271463394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,15,0.016651199758052827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,15,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,31,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,31,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,63,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,63,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,127,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,127,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,255,0.016625599563121797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,255,0.00835840031504631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,511,0.016758400201797485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,511,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,1023,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,1023,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,2047,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,2047,0.00902400016784668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,4095,0.020684799551963805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,4095,0.01053759977221489
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,float16,8191,0.04121919870376587
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,32,8,128,1,float16,fp8,8191,0.014569599926471711
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,7,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,7,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,15,0.01890240013599396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,1,0.018782399594783783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,1,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,3,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,3,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,15,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,31,0.019704000651836397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,31,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,63,0.020160000026226043
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,63,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,127,0.018900799751281738
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,127,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,255,0.019705599546432494
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,255,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,511,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,511,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,1023,0.041126400232315063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,1023,0.016598400473594666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,2047,0.06034719944000244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,2047,0.02971999943256378
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,float16,4095,0.0964031994342804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,2,128,1,float16,fp8,4095,0.04918720126152039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,1,0.02088640034198761
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,1,0.010824000090360641
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,3,0.021665599942207337
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,3,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,7,0.022089600563049316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,7,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,15,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,15,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,31,0.021652799844741822
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,31,0.011164800077676774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,63,0.02261279970407486
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,63,0.010604800283908844
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,127,0.01058880016207695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,127,0.021540799736976625
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,255,0.0217631995677948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,255,0.010590399801731109
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,511,0.041335999965667725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,511,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,1023,0.061592000722885135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,1023,0.02890399992465973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,2047,0.09834079742431641
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,2047,0.048091199994087216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,1,0.026796799898147584
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,float16,4095,0.17300000190734863
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,4,128,1,float16,fp8,4095,0.08483039736747741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,1,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,3,0.026791998744010927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,3,0.01467359960079193
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,7,0.02677760124206543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,7,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,15,0.026811200380325317
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,15,0.015048000216484069
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,31,0.02683199942111969
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,31,0.014667199552059173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,63,0.02687999904155731
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,63,0.014635199308395385
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,127,0.026766398549079896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,1023,0.09671199917793274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,127,0.014635199308395385
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,255,0.04131200015544891
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,255,0.015326400101184846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,511,0.06083999872207642
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,511,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,1023,0.04901440143585205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,2047,0.16977759599685668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,2047,0.08780480027198792
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,float16,4095,0.3152143955230713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,32,8,128,1,float16,fp8,4095,0.16478079557418823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,1,0.024217599630355836
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,7,0.024718399345874786
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,1,0.012603199481964112
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,3,0.024135999381542206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,31,0.01255359947681427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,3,0.012513600289821625
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,7,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,15,0.02428639978170395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,15,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,31,0.0235615998506546
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,63,0.023603199422359465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,63,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,127,0.02388319969177246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,127,0.012614400684833526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,255,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,1023,0.031012800335884095
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,1,0.026873600482940675
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,255,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,511,0.04342719912528992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,fp8,511,0.01675360053777695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,2,128,1,float16,float16,1023,0.06167839765548706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,1,0.014900800585746766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,3,0.026851201057434083
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,3,0.014616000652313232
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,7,0.0268640011548996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,31,0.01510079950094223
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,7,0.01462240070104599
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,15,0.02688960134983063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,15,0.014699199795722961
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,31,0.027009600400924684
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,255,0.04272319972515106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,63,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,63,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,127,0.02932479977607727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,127,0.014632000029087067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,255,0.015134400129318238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,511,0.06149759888648987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,511,0.028780800104141236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,fp8,1023,0.04916000068187713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,4,128,1,float16,float16,1023,0.09919040203094483
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,15,0.03712640106678009
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,1,0.03716799914836884
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,1,0.02279839962720871
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,3,0.037118399143218996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,3,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,7,0.037041598558425905
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,7,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,15,0.022907200455665588
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,31,0.037067198753356935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,31,0.02271360009908676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,63,0.03934719860553741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,63,0.0228752002120018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,127,0.044407999515533446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,127,0.022977599501609804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,255,0.06201120018959046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,1023,0.08652799725532531
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,255,0.0294048011302948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,511,0.09836800098419189
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,fp8,511,0.048286399245262145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,32,8,128,1,float16,float16,1023,0.17120959758758544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,1,0.030929601192474364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,1,0.01748799979686737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,3,0.016972799599170686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,7,0.01746399998664856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,7,0.031038400530815125
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,3,0.031302401423454286
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,15,0.030964800715446474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,15,0.01671839952468872
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,31,0.031011199951171874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,31,0.017726400494575502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,63,0.03265439867973328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,63,0.017564800381660462
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,127,0.033004799485206605
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,127,0.017744000256061553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,255,0.04529759883880615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,255,0.016686399281024934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,float16,511,0.06488159894943238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,2,128,1,float16,fp8,511,0.030239999294281006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,1,0.03744960129261017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,1,0.023766399919986726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,3,0.037136000394821164
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,3,0.024508799612522125
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,7,0.03747679889202118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,7,0.024432000517845155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,15,0.03887520134449005
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,15,0.022991999983787537
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,31,0.038011199235916136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,31,0.02322240024805069
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,255,0.06362879872322083
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,63,0.03920480012893677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,63,0.02332320064306259
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,127,0.04724799990653992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,127,0.023601600527763368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,1,0.039233601093292235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,255,0.029867199063301087
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,float16,511,0.10252959728240967
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,4,128,1,float16,fp8,511,0.04891839921474457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,1,0.05760319828987122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,15,0.03911519944667816
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,3,0.05763520002365112
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,3,0.03924799859523773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,7,0.05758399963378906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,7,0.039073601365089417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,15,0.05766400098800659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,127,0.06831679940223694
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,31,0.06149439811706543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,31,0.039175999164581296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,63,0.06393920183181763
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,63,0.039084801077842714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,127,0.041099199652671815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,255,0.1024783968925476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,float16,511,0.1743199944496155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,511,0.08660799860954285
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,32,8,128,1,float16,fp8,255,0.05154719948768616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,3,0.01706400066614151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,15,0.019523200392723084
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,7,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,1,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,7,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,15,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,31,0.018139199912548067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,127,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,255,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,255,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,3,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,1,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,1023,0.018904000520706177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,31,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,63,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,63,0.009465599805116654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,127,0.018694399297237395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,511,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,511,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,1023,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,2047,0.020745599269866945
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,2047,0.010791999846696853
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,4095,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,4095,0.012492799758911132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,float16,8191,0.024868799746036528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,2,128,1,float16,fp8,8191,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,1,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,1,0.008484800159931184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,3,0.016710400581359863
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,3,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,7,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,7,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,15,0.016748799383640288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,127,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,15,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,31,0.016675199568271636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,31,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,63,0.01650879979133606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,511,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,63,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,127,0.016752000153064727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,255,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,255,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,511,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,1023,0.017148800194263458
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,1023,0.008537600189447403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,2047,0.01874080002307892
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,2047,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,4095,0.020868800580501556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,4095,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,float16,8191,0.04126720130443573
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,4,128,1,float16,fp8,8191,0.014499199390411378
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,1,0.017608000338077544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,1,0.008396799862384795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,3,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,3,0.00833280012011528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,7,0.018415999412536622
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,7,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,15,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,15,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,31,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,31,0.008392000198364257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,63,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,511,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,63,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,127,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,127,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,255,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,255,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,1023,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,511,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,4095,0.014161600172519684
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,1023,0.009681600332260131
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,2047,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,2047,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,4095,0.041257598996162416
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,float16,8191,0.05957279801368713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,32,8,128,1,float16,fp8,8191,0.028934401273727418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,float16,1,0.04436799883842468
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,fp8,1,0.02880159914493561
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,fp8,3,0.02855199873447418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,float16,7,0.044407999515533446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,float16,3,0.04429920017719269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,fp8,63,0.02894560098648071
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,fp8,7,0.02850399911403656
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,float16,15,0.04426400065422058
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,fp8,15,0.028838399052619933
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,float16,255,0.06574400067329407
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,float16,31,0.04356000125408173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,fp8,31,0.028515198826789857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,float16,63,0.04949600100517273
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,float16,127,0.05124160051345825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,fp8,127,0.0289247989654541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,2,128,1,float16,fp8,255,0.031948798894882204
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,float16,1,0.05761600136756897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,fp8,1,0.0408048003911972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,float16,3,0.057529598474502563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,fp8,3,0.04040000140666962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,float16,7,0.05761439800262451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,fp8,7,0.041142401099205014
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,float16,15,0.05831999778747558
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,fp8,15,0.040720000863075256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,float16,31,0.06343839764595031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,float16,255,0.10330400466918946
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,fp8,31,0.040987199544906615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,float16,63,0.06456159949302673
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,fp8,63,0.040561598539352414
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,float16,3,0.10013760328292846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,float16,127,0.06878719925880432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,fp8,127,0.04209760129451752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,4,128,1,float16,fp8,255,0.05156800150871277
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,float16,1,0.09951840043067932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,fp8,1,0.07184159755706787
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,fp8,3,0.07191359996795654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,float16,7,0.10065599679946899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,fp8,7,0.07198399901390076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,float16,15,0.10825599431991577
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,fp8,15,0.0719760000705719
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,float16,31,0.10879839658737182
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,fp8,31,0.07196639776229859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,float16,63,0.10887839794158935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,fp8,255,0.09320639967918395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,fp8,63,0.07192639708518982
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,float16,127,0.11653120517730713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,fp8,127,0.07384639978408813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,32,8,128,1,float16,float16,255,0.18415839672088624
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,float16,7,0.06876320242881775
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,fp8,7,0.04946720004081726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,float16,1,0.06993439793586731
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,fp8,1,0.049384000897407535
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,fp8,3,0.049344000220298764
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,float16,3,0.06897600293159485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,fp8,15,0.04930880069732666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,float16,15,0.07429119944572449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,float16,31,0.0761135995388031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,fp8,31,0.04938080012798309
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,float16,63,0.07602400183677674
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,fp8,63,0.04982239902019501
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,float16,3,0.10002399682998657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,float16,127,0.07635200023651123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,2,128,1,float16,fp8,127,0.050684797763824466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,float16,1,0.09968000054359435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,fp8,1,0.07406560182571412
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,fp8,3,0.07388479709625244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,float16,7,0.10675359964370727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,fp8,7,0.07401599884033203
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,float16,15,0.108024001121521
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,fp8,15,0.07390080094337463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,fp8,63,0.07503359913825988
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,float16,31,0.10835360288619995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,fp8,31,0.07434560060501098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,float16,63,0.10919680595397949
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,float16,127,0.11769759654998779
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,fp8,3,0.13547680377960206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,4,128,1,float16,fp8,127,0.07616479992866516
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,float16,1,0.18163520097732544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,fp8,1,0.13540639877319335
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,float16,3,0.18857760429382325
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,float16,7,0.1918287992477417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,fp8,7,0.13562400341033937
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,float16,15,0.19212000370025634
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,fp8,15,0.13544479608535767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,float16,31,0.1927008032798767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,fp8,31,0.1356112003326416
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,float16,63,0.19194719791412354
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,fp8,63,0.1374608039855957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,fp8,127,0.13756959438323973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,32,8,128,1,float16,float16,127,0.20616960525512695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,3,0.008630400151014328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,1,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,31,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,1,0.016769599914550782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,31,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,7,0.008856000006198883
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,7,0.016812799870967864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,3,0.017548799514770508
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,15,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,15,0.008875200152397155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,63,0.01660960018634796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,63,0.008556800335645676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,127,0.018568000197410582
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,127,0.008422400057315826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,255,0.01671680063009262
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,255,0.00856959968805313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,511,0.017083199322223665
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,511,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,1023,0.017454400658607483
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,1023,0.009107200056314468
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,2047,0.019699199497699736
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,2047,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,4095,0.022388799488544463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,4095,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,float16,8191,0.0415583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,2,128,1,float16,fp8,8191,0.015703999996185304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,1,0.018544000387191773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,1,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,3,0.017236800491809846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,3,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,7,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,7,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,15,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,15,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,31,0.01852319985628128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,31,0.008531200140714646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,63,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,63,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,127,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,127,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,255,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,255,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,511,0.016804799437522888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,511,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,1023,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,1023,0.008560000360012055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,2047,0.020852799713611602
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,2047,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,4095,0.04193759858608246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,4095,0.014473600685596466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,7,0.016867199540138246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,float16,8191,0.05958240032196045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,4,128,1,float16,fp8,8191,0.028999999165534973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,1,0.016572800278663636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,1,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,3,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,3,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,63,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,7,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,15,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,15,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,31,0.016836799681186676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,511,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,31,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,63,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,127,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,127,0.01706240028142929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,255,0.017315199971199034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,255,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,511,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,1023,0.020843200385570526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,1023,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,8191,0.04637919962406158
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,2047,0.04080959856510162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,2047,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,4095,0.058950400352478026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,fp8,4095,0.027697598934173583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,32,8,128,1,float16,float16,8191,0.09549919962882995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,15,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,1,0.018646399676799773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,1,0.00974079966545105
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,3,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,3,0.008555199950933456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,63,0.010273600369691849
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,7,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,7,0.008743999898433686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,15,0.01021760031580925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,31,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,31,0.008700799942016602
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,63,0.01875839978456497
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,127,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,127,0.010208000242710114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,255,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,255,0.009059199690818786
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,511,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,4095,0.0412880003452301
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,511,0.0088639996945858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,1023,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,1023,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,2047,0.0213359996676445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,2047,0.012425599992275238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,4095,0.015204800665378571
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,8191,0.05957440137863159
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,8191,0.02909280061721802
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,float16,16383,0.09642400145530701
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,1,0.016571199893951415
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,1,128,1,float16,fp8,16383,0.04748159945011139
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,1,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,3,0.016689600050449373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,3,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,7,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,7,0.008299200236797333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,15,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,15,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,31,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,31,0.008327999711036682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,63,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,63,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,127,0.017548799514770508
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,127,0.00830880030989647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,255,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,255,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,511,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,511,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,1023,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,1023,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,2047,0.0411215990781784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,2047,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,4095,0.05909280180931091
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,16383,0.08340960144996643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,4095,0.028086400032043456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,1,0.00865119993686676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,8191,0.09492959976196289
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,fp8,8191,0.046305599808692935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,2,128,1,float16,float16,16383,0.16703039407730103
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,1,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,3,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,31,0.018719999492168425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,3,0.00857120007276535
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,7,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,7,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,15,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,15,0.008644799888134002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,31,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,63,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,63,0.008556800335645676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,127,0.018648000061511995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,127,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,255,0.01863359957933426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,255,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,511,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,511,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,1023,0.0409168004989624
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,1023,0.013699199259281158
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,2047,0.028811201453208923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,2047,0.05985119938850403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,4095,0.09617120027542114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,4095,0.047163200378417966
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,8191,0.0843455970287323
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,8191,0.1690719962120056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,1,0.021456000208854676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,1,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,float16,16383,0.3176000118255615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,4,128,1,float16,fp8,16383,0.15938080549240113
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,3,0.021104000508785248
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,3,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,7,0.021240000426769257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,7,0.010713600367307664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,15,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,15,0.01079839989542961
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,31,0.02128800004720688
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,31,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,63,0.020740799605846405
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,63,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,127,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,127,0.010580799728631973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,255,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,255,0.010619200021028518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,511,0.04120799899101257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,511,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,1023,0.059648001194000246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,1023,0.02882240116596222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,2047,0.09682719707489014
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,2047,0.04798240065574646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,4095,0.1707039952278137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,4095,0.08709440231323243
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,8191,0.32330880165100095
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,1,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,8191,0.16504960060119628
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,1,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,3,0.016598400473594666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,3,0.008991999924182892
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,7,0.014494399726390838
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,fp8,16383,0.32149438858032225
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,16,8,128,1,float16,float16,16383,0.6195951938629151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,7,0.009601599723100662
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,15,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,15,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,31,0.016487999260425566
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,31,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,63,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,63,0.009481599926948548
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,127,0.009548799693584442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,127,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,255,0.014606399834156037
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,255,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,511,0.01658879965543747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,511,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,1023,0.01459999978542328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,1023,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,2047,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,2047,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,4095,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,4095,0.010564800351858139
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,8191,0.020614400506019592
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,8191,0.012639999389648438
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,float16,16383,0.02258400022983551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,1,128,1,float16,fp8,16383,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,1,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,1,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,3,0.015587200224399567
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,31,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,31,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,3,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,63,0.00841279998421669
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,7,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,7,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,15,0.014724799990653991
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,15,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,63,0.01467680037021637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,127,0.01650879979133606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,127,0.008313599973917007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,255,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,255,0.008337599784135818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,511,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,4095,0.016524800658226015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,4095,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,511,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,1023,0.014608000218868256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,1023,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,2047,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,2047,0.008951999992132188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,8191,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,8191,0.012489599734544754
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,float16,16383,0.02476000040769577
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,2,128,1,float16,fp8,16383,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,1,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,1,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,3,0.016667200624942778
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,3,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,7,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,7,0.008535999804735184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,127,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,15,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,15,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,31,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,31,0.008404800295829773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,63,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,63,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,127,0.008659200370311737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,255,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,255,0.008568000048398972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,511,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,511,0.008607999980449676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,1023,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,8191,0.012598399817943574
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,1023,0.008584000170230865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,2047,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,2047,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,4095,0.018880000710487364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,4095,0.010652799904346467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,8191,0.022463999688625336
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,fp8,16383,0.01446560025215149
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,4,128,1,float16,float16,16383,0.024907200038433074
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,1,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,1,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,31,0.016756799817085267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,3,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,3,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,7,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,7,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,15,0.018747200071811677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,15,0.008633600175380706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,31,0.008303999900817871
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,63,0.01716960072517395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,63,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,127,0.01660960018634796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,127,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,255,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,255,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,511,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,511,0.00856959968805313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,1023,0.01698880046606064
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,1023,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,2047,0.01852159947156906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,2047,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,4095,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,4095,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,8191,0.022859199345111846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,8191,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,7,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,float16,16383,0.024732799828052522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,15,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,16,8,128,1,float16,fp8,16383,0.01276959925889969
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,1,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,1,0.010100799798965453
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,3,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,3,0.009443199634552002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,7,0.00902400016784668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,15,0.008534400165081025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,31,0.016606399416923524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,31,0.009505599737167358
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,63,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,63,0.00997759997844696
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,127,0.015583999454975128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,127,0.009489600360393525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,255,0.015652799606323244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,255,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,511,0.01661919951438904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,511,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,1023,0.01661919951438904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,1023,0.010316800326108932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,2047,0.01847680062055588
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,2047,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,4095,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,4095,0.011615999788045884
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,8191,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,8191,0.01297760009765625
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,float16,16383,0.024035200476646423
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,1,0.016748799383640288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,1,128,1,float16,fp8,16383,0.017348800599575043
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,1,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,3,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,31,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,7,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,7,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,3,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,15,0.01674560010433197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,15,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,31,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,63,0.016729600727558136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,63,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,127,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,127,0.008510400354862214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,255,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,511,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,511,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,255,0.016782400012016297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,1023,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,1023,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,2047,0.018564799427986146
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,2047,0.008580800145864487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,4095,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,4095,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,8191,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,8191,0.012664000689983367
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,float16,16383,0.02492160052061081
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,7,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,2,128,1,float16,fp8,16383,0.014686399698257446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,15,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,1,0.016673600673675536
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,1,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,3,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,3,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,7,0.01777600049972534
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,15,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,31,0.017628799378871917
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,31,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,63,0.016761599481105803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,63,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,127,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,127,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,255,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,255,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,511,0.017579199373722078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,511,0.008481600135564805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,1023,0.016969600319862367
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,1023,0.008526399731636047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,2047,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,2047,0.010332799702882766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,4095,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,4095,0.01055999994277954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,8191,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,8191,0.012555199861526489
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,float16,16383,0.022998400032520294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,4,128,1,float16,fp8,16383,0.014302399754524232
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,1,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,1,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,31,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,3,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,3,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,7,0.017667199671268462
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,7,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,15,0.016788800060749055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,15,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,31,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,63,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,63,0.008566399663686752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,127,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,127,0.008604799956083297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,255,0.018748800456523895
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,255,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,511,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,511,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,1023,0.018648000061511995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,1023,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,2047,0.01870879977941513
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,2047,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,4095,0.02088479995727539
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,4095,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,8191,0.02500480115413666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,8191,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,float16,16383,0.043952000141143796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,16,8,128,1,float16,fp8,16383,0.014696000516414643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,7,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,1,0.0169295996427536
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,1,0.008886399865150451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,3,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,3,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,7,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,15,0.016659200191497803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,15,0.008408000320196151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,31,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,31,0.009296000003814697
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,63,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,63,0.008499199897050858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,127,0.01723040044307709
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,127,0.008523199707269669
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,255,0.018561600148677825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,255,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,511,0.01672160029411316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,511,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,1023,0.017803199589252472
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,1023,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,2047,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,2047,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,4095,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,1,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,4095,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,8191,0.041335999965667725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,8191,0.015625600516796113
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,float16,16383,0.05858880281448364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,1,0.017841599881649017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,1,128,1,float16,fp8,16383,0.029172798991203307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,3,0.01693760007619858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,3,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,31,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,31,0.018510399758815764
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,7,0.017088000476360322
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,15,0.017044800519943237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,7,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,15,0.008342400193214417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,63,0.017025600373744964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,63,0.008326400071382523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,127,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,127,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,255,0.017350399494171144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,511,0.017691199481487275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,511,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,1023,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,255,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,1023,0.008752000331878663
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,2047,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,2047,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,16383,0.04691999852657318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,4095,0.04110400080680847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,4095,0.013372799754142762
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,3,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,8191,0.059683197736740114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,fp8,8191,0.02874079942703247
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,2,128,1,float16,float16,16383,0.09452319741249085
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,15,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,1,0.016752000153064727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,1,0.008246400207281113
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,3,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,7,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,7,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,15,0.016598400473594666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,255,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,31,0.016735999286174773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,31,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,63,0.016569599509239197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,63,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,127,0.017552000284194947
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,127,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,255,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,2047,0.012555199861526489
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,511,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,511,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,1023,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,8191,0.04621759951114655
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,1023,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,2047,0.04036799967288971
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,4095,0.05957440137863159
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,4095,0.027980801463127137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,8191,0.09489120244979858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,float16,16383,0.16785119771957396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,1,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,4,128,1,float16,fp8,16383,0.08266239762306213
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,1,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,3,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,3,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,7,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,7,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,15,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,15,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,31,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,31,0.008591999858617782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,63,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,63,0.008646400272846222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,127,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,127,0.008526399731636047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,255,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,255,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,511,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,511,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,1023,0.04082080125808716
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,1023,0.0126351997256279
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,2047,0.05950400233268738
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,2047,0.02853280007839203
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,4095,0.09601920247077941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,4095,0.04769600033760071
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,8191,0.1688704013824463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,8191,0.08911679983139038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,1,0.01767839938402176
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,1,0.009683199971914292
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,float16,16383,0.3185487985610962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,3,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,16,8,128,1,float16,fp8,16383,0.17185280323028565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,3,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,7,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,7,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,15,0.018753600120544434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,15,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,31,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,31,0.009550400078296661
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,63,0.01910720020532608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,63,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,127,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,127,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,255,0.018812799453735353
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,255,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,511,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,511,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,1023,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,1023,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,2047,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,2047,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,4095,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,4095,0.01168000027537346
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,3,0.016667200624942778
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,8191,0.023496000468730925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,3,0.008534400165081025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,8191,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,float16,16383,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,1,128,1,float16,fp8,16383,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,1,0.018848000466823576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,31,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,1,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,7,0.016705599427223206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,7,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,15,0.01875839978456497
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,15,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,31,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,63,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,63,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,127,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,127,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,255,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,255,0.008556800335645676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,511,0.016771200299263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,511,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,1023,0.018753600120544434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,8191,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,1023,0.008484800159931184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,2047,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,2047,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,4095,0.020657600462436677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,4095,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,8191,0.012583999335765839
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,float16,16383,0.024967999756336214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,2,128,1,float16,fp8,16383,0.012854400277137756
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,1,0.017569600045681
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,1,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,31,0.017969599366188048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,3,0.016777600347995757
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,3,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,7,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,7,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,15,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,15,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,31,0.008528000116348267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,63,0.017019200325012206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,63,0.00859680026769638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,127,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,127,0.00851999968290329
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,255,0.01693280041217804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,255,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,511,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,511,0.008422400057315826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,1023,0.01855839937925339
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,1023,0.008510400354862214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,2047,0.018804800510406495
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,2047,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,4095,0.020776000618934632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,4095,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,8191,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,8191,0.012606400251388549
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,7,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,float16,16383,0.04311679899692535
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,4,128,1,float16,fp8,16383,0.014897599816322327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,1,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,1,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,3,0.01659200042486191
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,3,0.008324799686670303
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,7,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,15,0.01658879965543747
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,15,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,31,0.01660960018634796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,255,0.016505600512027742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,31,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,63,0.016681599617004394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,63,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,127,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,127,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,255,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,511,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,511,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,1023,0.016780799627304076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,1023,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,2047,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,2047,0.010134399682283402
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,4095,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,4095,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,8191,0.039673599600791934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,8191,0.013988800346851349
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,float16,16383,0.05953599810600281
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,16,8,128,1,float16,fp8,16383,0.028990399837493897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,1,0.017662400007247926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,1,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,15,0.008329600095748901
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,3,0.017481599748134614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,31,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,3,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,63,0.008324799686670303
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,7,0.016572800278663636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,7,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,15,0.018544000387191773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,31,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,63,0.01706400066614151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,127,0.017726400494575502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,127,0.008502399921417237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,255,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,255,0.008399999886751174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,511,0.019014400243759156
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,511,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,1023,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,1023,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,2047,0.041140800714492796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,2047,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,4095,0.0596560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,4095,0.02935839891433716
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,float16,8191,0.09553599953651429
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,1,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,1,128,1,float16,fp8,8191,0.046961599588394166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,1,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,3,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,3,0.008486399799585343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,7,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,7,0.009169600158929824
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,15,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,15,0.008659200370311737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,31,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,31,0.00897120013833046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,63,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,63,0.008502399921417237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,127,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,127,0.008894400298595428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,255,0.018646399676799773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,255,0.008631999790668487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,511,0.021823999285697938
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,511,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,1023,0.04105759859085083
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,1023,0.013191999495029449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,2047,0.028910401463508605
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,2047,0.05957760214805603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,4095,0.09629439711570739
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,4095,0.04731360077857971
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,float16,8191,0.1681615948677063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,1,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,1,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,2,128,1,float16,fp8,8191,0.08470559716224671
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,3,0.02154400050640106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,3,0.010601600259542465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,7,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,7,0.010619200021028518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,15,0.021721599996089934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,15,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,31,0.02281759977340698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,31,0.010859200358390808
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,63,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,63,0.010660800337791442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,127,0.020745599269866945
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,127,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,255,0.021347199380397797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,2047,0.0980127990245819
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,255,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,511,0.041308799386024476
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,511,0.014454400539398194
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,1023,0.06123999953269958
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,1023,0.028764799237251282
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,2047,0.047651201486587524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,4095,0.17165119647979737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,4095,0.08424000144004821
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,1,0.026804798841476442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,1,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,float16,8191,0.3227583885192871
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,15,0.02683840095996857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,4,128,1,float16,fp8,8191,0.15737760066986084
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,3,0.026948800683021544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,3,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,7,0.026787200570106508
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,7,0.014641599357128143
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,15,0.014758400619029999
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,31,0.02683199942111969
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,31,0.014664000272750855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,63,0.02680320143699646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,63,0.014588800072669984
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,127,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,127,0.014707200229167938
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,255,0.04119200110435486
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,255,0.015017600357532501
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,511,0.059934401512146
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,511,0.028995200991630554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,1023,0.09650400280952454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,1023,0.04827840030193329
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,2047,0.16927679777145385
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,2047,0.08588799834251404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,1,0.020531199872493744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,1,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,4095,0.31525919437408445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,4095,0.16672639846801757
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,3,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,3,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,7,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,31,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,7,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,fp8,8191,0.32608320713043215
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,15,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,16,8,128,1,float16,float16,8191,0.6066431999206543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,127,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,15,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,31,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,63,0.02072480022907257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,63,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,127,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,255,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,255,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,511,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,511,0.012406399846076966
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,3,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,1023,0.04309920072555542
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,1023,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,float16,2047,0.0610368013381958
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,1,128,1,float16,fp8,2047,0.02982879877090454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,1,0.022742399573326112
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,1,0.01074720025062561
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,3,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,7,0.022635200619697572
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,7,0.010604800283908844
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,15,0.022830399870872497
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,15,0.011643200367689132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,31,0.022686399519443512
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,31,0.01053439974784851
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,63,0.022710399329662324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,63,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,127,0.02271360009908676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,127,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,255,0.022675199806690215
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,255,0.01157120019197464
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,511,0.042870399355888364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,511,0.01451680064201355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,1023,0.061615997552871705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,1023,0.02919520139694214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,float16,2047,0.09855200052261352
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,2,128,1,float16,fp8,2047,0.04796639978885651
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,1,0.0268095999956131
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,1,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,3,0.026790401339530943
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,3,0.014545600116252898
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,7,0.026767998933792114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,7,0.014678399264812469
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,15,0.02690559923648834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,15,0.014822399616241455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,31,0.026824000477790832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,31,0.014752000570297241
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,63,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,63,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,127,0.026878398656845093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,127,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,1023,0.04820800125598908
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,255,0.04264799952507019
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,2047,0.1708016037940979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,255,0.015406399965286255
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,511,0.06167839765548706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,511,0.02900480031967163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,float16,1023,0.09843680262565613
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,1,0.03703039884567261
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,4,128,1,float16,fp8,2047,0.0859167993068695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,1,0.02287199944257736
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,3,0.03713760077953339
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,3,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,7,0.03707039952278137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,7,0.02276960015296936
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,15,0.0370608001947403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,15,0.022892799973487855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,31,0.03710240125656128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,31,0.02295680046081543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,63,0.039273598790168764
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,63,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,127,0.044819200038909913
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,127,0.02279199957847595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,255,0.06153280138969421
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,255,0.029246398806571962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,511,0.09786880016326904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,1,0.024897600710391998
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,511,0.048363199830055235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,1023,0.16987199783325196
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,1023,0.08580800294876098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,3,0.024801599979400634
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,float16,2047,0.31444320678710935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,1,0.012670400738716125
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,16,8,128,1,float16,fp8,2047,0.1613103985786438
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,3,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,7,0.024766400456428528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,7,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,15,0.024859200417995452
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,127,0.024766400456428528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,15,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,31,0.024900799989700316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,31,0.012615999579429627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,63,0.02481600046157837
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,63,0.0125231996178627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,127,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,255,0.025206398963928223
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,255,0.01249760016798973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,511,0.044537600874900815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,511,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,float16,1023,0.0637615978717804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,1,128,1,float16,fp8,1023,0.03105599880218506
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,1,0.028803199529647827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,1,0.015703999996185304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,3,0.028944000601768494
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,3,0.01470080018043518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,7,0.014715200662612915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,7,0.02887679934501648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,15,0.028932800889015196
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,15,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,31,0.02884800136089325
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,31,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,63,0.028825598955154418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,511,0.029129600524902342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,63,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,127,0.028860801458358766
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,1,0.037939199805259706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,127,0.01605280041694641
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,255,0.04521600008010864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,255,0.014655999839305878
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,511,0.06362079977989196
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,float16,1023,0.09999359846115112
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,2,128,1,float16,fp8,1023,0.04829759895801544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,1,0.02279199957847595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,3,0.039087998867034915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,31,0.022865599393844603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,3,0.02292319983243942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,7,0.038008001446723935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,7,0.02274080067873001
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,15,0.03762080073356629
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,15,0.02295839935541153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,31,0.03714079856872558
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,255,0.0288783997297287
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,63,0.0390720009803772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,63,0.023209600150585173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,127,0.045633599162101746
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,127,0.02289759963750839
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,255,0.06352959871292115
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,511,0.10111360549926758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,511,0.04778560101985931
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,3,0.039073601365089417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,float16,1023,0.17452319860458373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,1,0.05758559703826904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,4,128,1,float16,fp8,1023,0.08515200018882751
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,1,0.03918080031871796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,3,0.05770720243453979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,7,0.05761600136756897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,7,0.03914560079574585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,15,0.0575872004032135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,15,0.03914879858493805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,31,0.060102397203445436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,31,0.039208000898361205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,63,0.06365280151367188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,63,0.03915359973907471
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,127,0.06777920126914978
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,127,0.04118080139160156
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,255,0.10122720003128052
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,255,0.05131999850273132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,511,0.17237759828567506
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,511,0.08634719848632813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,1,0.01878879964351654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,1,0.008710400015115739
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,float16,1023,0.315833592414856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,3,0.018780800700187682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,16,8,128,1,float16,fp8,1023,0.16138720512390137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,3,0.0095551997423172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,7,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,7,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,15,0.01913760006427765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,15,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,31,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,31,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,63,0.01876640021800995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,63,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,127,0.018787199258804323
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,127,0.009369599819183349
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,255,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,255,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,2047,0.010576000064611435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,511,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,511,0.010355199873447418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,1023,0.01855040043592453
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,1023,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,2047,0.020598399639129638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,4095,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,4095,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,8191,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,8191,0.014295999705791474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,float16,16383,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,1,128,1,float16,fp8,16383,0.015966400504112244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,7,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,1,0.017654399573802947
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,1,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,3,0.016761599481105803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,3,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,7,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,15,0.018515199422836304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,15,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,31,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,31,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,63,0.016737599670886994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,63,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,127,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,127,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,255,0.01703999936580658
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,255,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,511,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,511,0.008523199707269669
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,1023,0.018694399297237395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,8191,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,1023,0.00854559987783432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,2047,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,4095,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,2047,0.018769599497318268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,4095,0.010609599947929382
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,8191,0.02289759963750839
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,float16,16383,0.04342719912528992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,2,128,1,float16,fp8,16383,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,1,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,1,0.00833119973540306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,31,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,3,0.01671839952468872
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,7,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,7,0.008408000320196151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,15,0.016601599752902985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,3,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,15,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,31,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,63,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,63,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,127,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,127,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,2047,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,255,0.016711999475955964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,2047,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,4095,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,255,0.00833280012011528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,511,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,511,0.008393599838018417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,1023,0.016732800006866454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,16383,0.028812798857688903
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,1023,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,4095,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,8191,0.0412880003452301
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,fp8,8191,0.014534400403499603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,4,128,1,float16,float16,16383,0.05784479975700378
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,1,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,1,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,3,0.01793919950723648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,31,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,3,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,7,0.016748799383640288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,7,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,15,0.017071999609470367
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,15,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,31,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,63,0.0176144003868103
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,63,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,127,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,127,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,255,0.01828639954328537
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,255,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,511,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,511,0.00851840004324913
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,1023,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,8191,0.05960639715194702
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,1023,0.009560000151395798
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,2047,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,2047,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,4095,0.041116800904273984
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,4095,0.013158400356769562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,8191,0.027793601155281067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,float16,16383,0.09589599967002868
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,16,8,128,1,float16,fp8,16383,0.04663679897785187
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,1,0.031656000018119815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,1,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,3,0.03099679946899414
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,3,0.018297599256038667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,7,0.031092798709869383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,7,0.018321600556373597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,15,0.030904000997543334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,15,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,31,0.03128480017185211
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,31,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,255,0.04528320133686066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,63,0.030987200140953065
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,63,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,127,0.03258880078792572
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,127,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,255,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,float16,511,0.06549919843673706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,1,0.037134400010108946
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,1,128,1,float16,fp8,511,0.030979201197624207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,1,0.024963200092315674
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,3,0.03855679929256439
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,3,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,7,0.037747201323509214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,7,0.02470880001783371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,15,0.03816480040550232
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,15,0.02476319968700409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,31,0.038464000821113585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,31,0.024639999866485594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,63,0.041176000237464906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,63,0.0247856006026268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,127,0.0472351998090744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,127,0.02471359968185425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,255,0.0636672019958496
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,255,0.03087199926376343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,float16,511,0.10106240510940552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,2,128,1,float16,fp8,511,0.048583999276161194
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,1,0.05753920078277588
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,1,0.039366400241851805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,3,0.05751519799232483
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,3,0.0392655998468399
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,7,0.0575007975101471
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,7,0.039952000975608824
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,15,0.057524800300598145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,15,0.039155200123786926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,31,0.061406397819519044
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,31,0.039827200770378116
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,63,0.06383839845657349
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,63,0.039617601037025454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,127,0.06802240014076233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,127,0.041993600130081174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,255,0.1017583966255188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,255,0.05037279725074768
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,float16,511,0.17447999715805054
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,4,128,1,float16,fp8,511,0.08634080290794373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,1,0.10004639625549316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,1,0.07190560102462769
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,15,0.07293760180473327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,3,0.10018080472946167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,3,0.07185279726982116
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,7,0.1002303957939148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,7,0.07191839814186096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,15,0.10696959495544434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,31,0.10804320573806762
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,31,0.07186400294303893
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,63,0.10818719863891602
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,63,0.0722815990447998
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,127,0.11518080234527588
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,127,0.07378079891204833
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,255,0.18217120170593262
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,511,0.16200799942016603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,fp8,255,0.09280959963798523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,float16,1,0.045259198546409606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,fp8,1,0.028870400786399842
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,fp8,15,0.028934401273727418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,16,8,128,1,float16,float16,511,0.3257807970046997
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,float16,3,0.0435344010591507
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,fp8,3,0.029016000032424927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,fp8,7,0.028862398862838746
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,float16,7,0.04488160014152527
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,float16,15,0.04406079947948456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,fp8,31,0.028942400217056276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,float16,31,0.04452959895133972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,float16,63,0.04737280011177063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,fp8,63,0.028830400109291075
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,float16,127,0.05141440033912659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,fp8,3,0.041254401206970215
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,fp8,127,0.0289792001247406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,float16,255,0.06652799844741822
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,1,128,1,float16,fp8,255,0.03320319950580597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,float16,1,0.057657599449157715
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,fp8,1,0.041284799575805664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,float16,3,0.05760480165481567
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,float16,7,0.057601600885391235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,fp8,7,0.04116320013999939
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,float16,15,0.05764480233192444
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,fp8,15,0.04120799899101257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,float16,31,0.06027519702911377
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,fp8,31,0.04133760035037994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,float16,63,0.06455519795417786
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,fp8,63,0.041315200924873355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,float16,127,0.06928640007972717
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,fp8,127,0.04346559941768646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,float16,255,0.10244959592819214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,2,128,1,float16,fp8,255,0.053895998001098636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,float16,1,0.09997439980506898
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,fp8,1,0.07195199728012085
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,float16,3,0.10052319765090942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,fp8,3,0.0721552014350891
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,float16,31,0.1069424033164978
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,fp8,31,0.07335039973258972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,float16,7,0.09928479790687561
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,fp8,7,0.0720624029636383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,float16,15,0.10841280221939087
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,fp8,15,0.07201279997825623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,float16,63,0.10905120372772217
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,fp8,63,0.07399839758872986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,float16,127,0.1167296051979065
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,fp8,127,0.07477440237998963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,float16,255,0.1833184003829956
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,4,128,1,float16,fp8,255,0.09247199892997741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,float16,1,0.18058240413665771
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,fp8,1,0.13540480136871338
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,float16,3,0.18084479570388795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,fp8,15,0.1354464054107666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,fp8,3,0.13551199436187744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,float16,7,0.1906831979751587
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,fp8,7,0.1354207992553711
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,float16,15,0.19084479808807372
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,float16,31,0.19254560470581056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,fp8,31,0.13600159883499147
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,float16,63,0.19192320108413696
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,fp8,63,0.13630399703979493
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,float16,127,0.2046367883682251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,fp8,127,0.1374384045600891
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,1,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,1,0.009035199880599976
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,3,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,float16,255,0.33741440773010256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,3,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,16,8,128,1,float16,fp8,255,0.17770880460739136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,7,0.018833599984645844
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,7,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,63,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,15,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,15,0.00947519987821579
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,31,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,511,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,31,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,63,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,127,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,127,0.010355199873447418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,2047,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,255,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,255,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,511,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,1023,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,1023,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,2047,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,4095,0.022678400576114654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,1,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,4095,0.012521600723266602
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,8191,0.0247311994433403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,8191,0.01462399959564209
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,float16,16383,0.04531359970569611
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,1,128,1,float16,fp8,16383,0.018651199340820313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,1,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,3,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,3,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,7,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,7,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,15,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,15,0.00833119973540306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,31,0.016729600727558136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,31,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,63,0.01671839952468872
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,63,0.008555199950933456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,1023,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,127,0.016686399281024934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,127,0.008406399935483932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,255,0.016734400391578676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,255,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,4095,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,511,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,8191,0.04037120044231415
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,511,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,1023,0.00830719992518425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,2047,0.018719999492168425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,2047,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,4095,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,8191,0.013748799264431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,fp8,16383,0.028921601176261903
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,2,128,1,float16,float16,16383,0.05917919874191284
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,1,0.017884799838066102
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,1,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,3,0.01720159947872162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,3,0.008396799862384795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,7,0.018545599281787874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,7,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,63,0.016977599263191222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,15,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,15,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,31,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,31,0.008499199897050858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,63,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,127,0.01788160055875778
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,127,0.00854720026254654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,255,0.016731199622154237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,255,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,511,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,511,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,1023,0.019171200692653656
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,1023,0.008481600135564805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,2047,0.020839999616146087
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,2047,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,4095,0.04100320041179657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,16383,0.04725280106067657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,4095,0.014425599575042724
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,8191,0.05960320234298706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,fp8,8191,0.02826080024242401
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,4,128,1,float16,float16,16383,0.09521440267562867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,1,0.01674720048904419
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,1,0.008292800188064576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,15,0.00841120034456253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,3,0.016846400499343873
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,3,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,7,0.01720000058412552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,7,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,127,0.01804800033569336
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,15,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,255,0.008323200047016144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,255,0.016991999745368958
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,31,0.016771200299263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,31,0.008422400057315826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,63,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,63,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,127,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,511,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,511,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,1023,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,1023,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,2047,0.041075199842453
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,2047,0.012639999389648438
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,4095,0.05891039967536926
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,4095,0.026976001262664796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,8191,0.09494559764862061
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,8191,0.04560160040855408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,1,0.017089599370956422
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,float16,16383,0.1669119954109192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,1,0.008393599838018417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,16,8,128,1,float16,fp8,16383,0.08381440043449402
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,31,0.018488000333309173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,3,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,31,0.008318399637937545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,3,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,7,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,7,0.008312000334262848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,15,0.016812799870967864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,15,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,63,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,63,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,127,0.018433600664138794
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,127,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,255,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,255,0.00835840031504631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,511,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,511,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,1023,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,1023,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,2047,0.020657600462436677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,2047,0.01045600026845932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,4095,0.04121440052986145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,4095,0.014574399590492249
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,8191,0.05973119735717773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,8191,0.028579199314117433
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,16383,0.09550880193710327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,16383,0.0468207985162735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,float16,32767,0.1673151969909668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,1,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,1,128,1,float16,fp8,32767,0.08274400234222412
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,1,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,3,0.0166703999042511
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,3,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,63,0.016732800006866454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,7,0.01676799952983856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,7,0.008348800241947174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,15,0.01675360053777695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,15,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,31,0.01695840060710907
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,31,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,63,0.008348800241947174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,127,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,127,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,255,0.016859200596809388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,255,0.008387199789285659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,511,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,511,0.008491200208663941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,1023,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,1023,0.01053439974784851
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,2047,0.04055519998073578
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,2047,0.012651200592517852
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,4095,0.058297598361968996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,4095,0.027260801196098326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,8191,0.09461439847946167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,8191,0.04584639966487884
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,16383,0.1663599967956543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,16383,0.08384640216827392
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,1,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,1,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,float16,32767,0.3108560085296631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,2,128,1,float16,fp8,32767,0.15546879768371583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,3,0.018755200505256652
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,3,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,7,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,31,0.008691199868917466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,7,0.008566399663686752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,15,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,15,0.008540800213813782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,31,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,63,0.018745599687099455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,63,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,127,0.018783999979496
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,511,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,127,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,1023,0.01383039951324463
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,255,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,255,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,511,0.02114560008049011
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,1023,0.041022399067878725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,2047,0.05957919955253601
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,2047,0.028251200914382935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,4095,0.0957759976387024
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,4095,0.04726560115814209
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,1,0.014579200744628906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,16383,0.3155200004577637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,3,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,8191,0.1701632022857666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,8191,0.084443199634552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,16383,0.15966720581054689
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,1,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,7,0.014715200662612915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,3,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,7,0.008348800241947174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,fp8,32767,0.3100496053695679
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,8,4,128,1,float16,float16,32767,0.6101823806762695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,15,0.013583999872207642
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,31,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,15,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,255,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,31,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,63,0.014550399780273438
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,63,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,127,0.012673600018024445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,255,0.012656000256538392
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,2047,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,127,0.008511999994516373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,511,0.014556799829006196
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,511,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,1023,0.014678399264812469
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,1023,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,2047,0.014519999921321868
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,4095,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,4095,0.009200000017881394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,8191,0.01709599941968918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,8191,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,16383,0.02268960028886795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,16383,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,float16,32767,0.030723199248313904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,1,128,1,float16,fp8,32767,0.024579200148582458
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,1,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,1,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,3,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,3,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,7,0.01658560037612915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,7,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,15,0.014699199795722961
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,15,0.008323200047016144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,31,0.014475199580192565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,31,0.008343999832868576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,63,0.014500799775123595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,63,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,127,0.01650879979133606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,127,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,255,0.016076800227165223
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,255,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,4095,0.017696000635623932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,511,0.01446239948272705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,8191,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,511,0.008511999994516373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,1023,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,1023,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,2047,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,2047,0.009785600006580353
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,4095,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,8191,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,16383,0.023500800132751465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,16383,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,float16,32767,0.025009599328041077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,2,128,1,float16,fp8,32767,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,1,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,1,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,3,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,3,0.00851999968290329
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,7,0.016659200191497803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,7,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,15,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,15,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,31,0.016873599588871004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,31,0.008615999668836593
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,63,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,63,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,127,0.016729600727558136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,1023,0.01656319946050644
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,127,0.008419200032949447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,1023,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,2047,0.016732800006866454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,255,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,255,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,511,0.017827199399471284
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,511,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,16383,0.022894400358200073
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,2047,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,4095,0.018795199692249298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,4095,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,8191,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,8191,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,16383,0.014679999649524688
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,float16,32767,0.025911998748779298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,8,4,128,1,float16,fp8,32767,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,1,0.014664000272750855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,1,0.008694399893283845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,3,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,3,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,7,0.01449279934167862
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,7,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,15,0.014545600116252898
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,31,0.014667199552059173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,15,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,31,0.008513599634170532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,255,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,63,0.014475199580192565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,63,0.008388800173997879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,127,0.014500799775123595
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,127,0.008345600217580795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,255,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,511,0.016521599888801575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,511,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,1023,0.014596800506114959
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,1023,0.008488000184297562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,2047,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,2047,0.008691199868917466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,4095,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,4095,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,8191,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,8191,0.012492799758911132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,16383,0.022729599475860597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,16383,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,float16,32767,0.026982399821281432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,1,128,1,float16,fp8,32767,0.018721599876880646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,1,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,1,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,3,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,3,0.008575999736785888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,7,0.01675360053777695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,7,0.008406399935483932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,127,0.016651199758052827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,15,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,15,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,31,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,31,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,63,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,1023,0.016734400391578676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,63,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,127,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,255,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,255,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,511,0.017084799706935883
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,511,0.008553600311279297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,1023,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,2047,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,2047,0.010360000282526016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,4095,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,4095,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,8191,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,8191,0.012585599720478059
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,16383,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,16383,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,float16,32767,0.026966398954391478
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,2,128,1,float16,fp8,32767,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,1,0.016678400337696075
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,15,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,1,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,3,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,3,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,7,0.018646399676799773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,7,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,15,0.016732800006866454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,31,0.016590400040149687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,31,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,63,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,63,0.00854559987783432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,127,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,127,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,255,0.01685280054807663
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,255,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,511,0.016657599806785585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,511,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,1023,0.018555200099945067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,1023,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,2047,0.02053920030593872
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,2047,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,4095,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,4095,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,8191,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,8191,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,3,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,16383,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,16383,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,float16,32767,0.04460639953613281
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,8,4,128,1,float16,fp8,32767,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,1,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,1,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,3,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,7,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,7,0.008326400071382523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,15,0.016708800196647645
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,15,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,31,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,31,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,255,0.008336000144481659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,63,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,511,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,63,0.008316799998283386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,127,0.01653439998626709
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,127,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,255,0.016651199758052827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,511,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,1023,0.016571199893951415
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,1023,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,2047,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,16383,0.05821920037269592
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,2047,0.009384000301361084
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,4095,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,4095,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,8191,0.04108479917049408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,8191,0.014496000111103058
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,16383,0.02882719933986664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,fp8,32767,0.04747360050678253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,1,128,1,float16,float16,32767,0.09507520198822021
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,1,0.016659200191497803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,1,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,3,0.017123199999332428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,3,0.008396799862384795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,7,0.016683200001716615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,7,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,63,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,15,0.017684799432754517
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,15,0.008398400247097015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,31,0.016820800304412842
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,31,0.00857120007276535
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,63,0.01847680062055588
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,127,0.017044800519943237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,127,0.008633600175380706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,255,0.016961599886417388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,255,0.008329600095748901
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,511,0.016737599670886994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,511,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,1023,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,1023,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,2047,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,2047,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,4095,0.04131200015544891
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,4095,0.01292639970779419
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,8191,0.05937439799308777
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,8191,0.02887519896030426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,16383,0.09606559872627259
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,16383,0.04694559872150421
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,1,0.01674560010433197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,float16,32767,0.1665824055671692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,15,0.01675360053777695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,2,128,1,float16,fp8,32767,0.08289920091629029
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,1,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,3,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,3,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,7,0.016612799465656282
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,7,0.008326400071382523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,31,0.016942399740219116
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,15,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,31,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,63,0.01687999963760376
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,63,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,127,0.01687040030956268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,127,0.008343999832868576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,255,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,255,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,511,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,511,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,1023,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,1023,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,2047,0.04068000018596649
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,2047,0.012615999579429627
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,4095,0.05944960117340088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,4095,0.0275983989238739
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,8191,0.09442560076713562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,8191,0.04592959880828858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,16383,0.16742559671401977
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,32767,0.15743199586868287
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,fp8,16383,0.0823535978794098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,1,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,8,4,128,1,float16,float16,32767,0.3123744010925293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,1,0.008665599673986436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,3,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,3,0.008387199789285659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,7,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,7,0.008531200140714646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,15,0.016590400040149687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,15,0.008641599863767623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,31,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,31,0.008548799902200699
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,255,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,63,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,1023,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,63,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,127,0.01863040030002594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,127,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,255,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,511,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,511,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,1023,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,2047,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,2047,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,4095,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,4095,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,8191,0.020827199518680572
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,8191,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,16383,0.02466080039739609
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,16383,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,float16,32767,0.027014398574829103
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,1,128,1,float16,fp8,32767,0.016620799899101257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,1,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,1,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,3,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,3,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,7,0.018063999712467194
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,7,0.008483199775218964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,15,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,127,0.008568000048398972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,15,0.00856959968805313
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,31,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,31,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,63,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,63,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,127,0.018564799427986146
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,255,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,255,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,511,0.017846399545669557
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,511,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,1023,0.018559999763965607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,1023,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,2047,0.018764799833297728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,2047,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,4095,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,4095,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,8191,0.021036800742149354
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,8191,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,16383,0.024796800315380098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,3,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,16383,0.013262400031089782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,float16,32767,0.04520800113677979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,2,128,1,float16,fp8,32767,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,1,0.018753600120544434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,1,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,3,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,7,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,7,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,15,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,15,0.008580800145864487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,31,0.016752000153064727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,31,0.008551999926567078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,63,0.016734400391578676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,63,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,127,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,127,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,255,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,255,0.008590400218963623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,511,0.017059199512004852
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,511,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,1023,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,1023,0.00852160006761551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,2047,0.018731200695037843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,2047,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,4095,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,4095,0.010704000294208527
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,8191,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,8191,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,16383,0.04328640103340149
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,16383,0.014691199362277984
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,float16,32767,0.06168799996376038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,8,4,128,1,float16,fp8,32767,0.030273601412773132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,1,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,15,0.016657599806785585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,1,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,15,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,3,0.017134399712085725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,3,0.00833280012011528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,7,0.01661120057106018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,7,0.008284799754619598
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,31,0.0174687996506691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,31,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,63,0.017156800627708434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,63,0.008408000320196151
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,127,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,255,0.017056000232696534
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,127,0.008316799998283386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,255,0.008393599838018417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,511,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,511,0.008542399853467941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,1023,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,1023,0.010364799946546554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,8191,0.09490079879760742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,8191,0.045614400506019594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,2047,0.04100959897041321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,2047,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,4095,0.05958080291748047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,4095,0.02828960120677948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,1,0.018649600446224213
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,1,0.008535999804735184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,float16,16383,0.1670624017715454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,15,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,1,128,1,float16,fp8,16383,0.08284800052642823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,31,0.018739199638366698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,3,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,3,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,7,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,7,0.008539199829101562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,15,0.009011200070381165
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,31,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,511,0.021169599890708924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,63,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,63,0.008937600255012512
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,127,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,127,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,255,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,255,0.008667200058698653
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,511,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,1023,0.04115520119667053
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,1023,0.01385280042886734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,2047,0.05956320166587829
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,2047,0.02892960011959076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,4095,0.09550079703330994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,4095,0.04743199944496155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,8191,0.16784800291061402
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,8191,0.0850928008556366
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,1,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,1,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,15,0.021143999695777894
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,float16,16383,0.31261439323425294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,2,128,1,float16,fp8,16383,0.15998879671096802
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,3,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,3,0.010599999874830245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,7,0.021049599349498748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,7,0.010603199899196624
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,15,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,31,0.02078080028295517
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,31,0.010556799918413162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,63,0.021143999695777894
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,63,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,127,0.022668799757957457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,127,0.011070399731397628
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,255,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,255,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,511,0.041142401099205014
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,511,0.01449279934167862
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,1023,0.061136001348495485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,1023,0.02889440059661865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,2047,0.09790880084037781
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,2047,0.04737440049648285
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,8191,0.320196795463562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,4095,0.1715775966644287
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,4095,0.08413919806480408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,8191,0.15835039615631102
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,1,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,1,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,3,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,3,0.008497600257396699
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,7,0.019684800505638124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,7,0.008870399743318557
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,fp8,16383,0.3042815923690796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,8,4,128,1,float16,float16,16383,0.6178080081939697
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,15,0.019222399592399596
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,15,0.008900800347328186
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,31,0.018972800672054292
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,31,0.008771199733018875
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,63,0.01913599967956543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,63,0.008958400040864945
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,127,0.020686399936676026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,255,0.019649599492549897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,127,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,255,0.008779200166463852
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,511,0.02282080054283142
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,511,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,1023,0.04298399984836578
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,1023,0.014076800644397735
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,2047,0.061617600917816165
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,2047,0.02858240008354187
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,float16,4095,0.09654880166053773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,1,128,1,float16,fp8,4095,0.04748159945011139
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,1,0.02272319942712784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,1,0.010620799660682679
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,3,0.022655999660491942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,3,0.010622400045394897
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,7,0.022761599719524385
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,7,0.010599999874830245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,15,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,15,0.01077279970049858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,127,0.01053439974784851
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,31,0.022708800435066224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,31,0.010531199723482132
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,63,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,63,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,127,0.022070400416851044
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,255,0.02285439968109131
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,255,0.010943999886512757
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,511,0.04192639887332916
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,511,0.014585599303245544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,1023,0.061710399389266965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,1023,0.02919520139694214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,2047,0.04768320024013519
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,2047,0.0990127980709076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,1,0.026807999610900878
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,float16,4095,0.17136160135269166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,2,128,1,float16,fp8,4095,0.08493120074272156
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,1,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,3,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,3,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,7,0.026833599805831908
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,7,0.014609600603580474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,127,0.02691200077533722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,15,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,15,0.01459999978542328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,31,0.026881599426269533
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,31,0.014547200500965118
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,63,0.026796799898147584
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,63,0.014590400457382201
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,127,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,255,0.04134239852428436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,255,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,511,0.060969597101211546
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,511,0.029014399647712706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,1023,0.09707199931144714
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,1023,0.04782719910144806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,2047,0.1700111985206604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,2047,0.08509600162506104
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,1,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,1,0.012483199685811996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,3,0.022844800353050233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,fp8,4095,0.15976159572601317
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,3,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,8,4,128,1,float16,float16,4095,0.31817920207977296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,63,0.024307200312614442
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,7,0.02276639938354492
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,7,0.011214400082826615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,15,0.02279520034790039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,15,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,31,0.023451200127601622
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,31,0.012476799637079239
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,127,0.024648000299930573
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,127,0.01141439974308014
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,255,0.02327840030193329
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,63,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,255,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,511,0.04437919855117798
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,511,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,1023,0.06282079815864564
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,1023,0.029355201125144958
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,float16,2047,0.10049599409103394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,1,128,1,float16,fp8,2047,0.047932800650596616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,1,0.028825598955154418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,1,0.014921599626541137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,3,0.028476798534393312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,3,0.015116800367832185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,7,0.02892799973487854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,63,0.014912000298500061
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,127,0.028788799047470094
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,7,0.01467680037021637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,255,0.04507359862327576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,15,0.028799998760223388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,15,0.015140800178050995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,31,0.02884320020675659
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,31,0.014726400375366211
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,63,0.028937599062919615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,127,0.015012800693511963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,255,0.014664000272750855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,511,0.06259520053863525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,511,0.028884801268577575
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,1023,0.09968799948692322
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,1023,0.047947201132774356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,float16,2047,0.17188479900360107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,1,0.03711200058460236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,2,128,1,float16,fp8,2047,0.08585280179977417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,7,0.02311359941959381
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,1,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,3,0.03892160058021545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,3,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,7,0.0372655987739563
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,15,0.03745599985122681
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,15,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,31,0.03909119963645935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,31,0.023057599365711213
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,255,0.06279039978981019
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,63,0.038252800703048706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,63,0.023185600340366364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,127,0.046726399660110475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,127,0.022947199642658234
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,1023,0.17336479425430298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,255,0.029967999458312987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,511,0.1003216028213501
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,511,0.047751998901367186
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,3,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,float16,2047,0.31801440715789797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,1023,0.08443040251731873
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,1,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,1,0.00849440023303032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,8,4,128,1,float16,fp8,2047,0.1583184003829956
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,31,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,3,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,63,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,7,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,7,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,15,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,15,0.008422400057315826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,31,0.008348800241947174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,63,0.008664000034332275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,127,0.01685280054807663
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,127,0.008483199775218964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,255,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,255,0.008486399799585343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,4095,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,511,0.01712000072002411
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,511,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,1023,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,1023,0.008560000360012055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,2047,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,2047,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,4095,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,8191,0.022844800353050233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,8191,0.012587200105190276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,16383,0.024779200553894043
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,16383,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,float16,32767,0.04527519941329956
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,1,128,1,float16,fp8,32767,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,1,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,1,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,3,0.01846559941768646
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,3,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,7,0.01659200042486191
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,7,0.008528000116348267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,127,0.01855040043592453
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,15,0.016974399983882903
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,15,0.008406399935483932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,31,0.017660799622535705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,31,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,63,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,63,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,127,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,255,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,255,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,511,0.01679359972476959
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,511,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,1023,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,1023,0.008881600201129913
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,2047,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,2047,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,4095,0.02078080028295517
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,4095,0.01101600006222725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,8191,0.02273920029401779
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,8191,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,16383,0.0433104008436203
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,16383,0.01528480052947998
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,float16,32767,0.061627197265625
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,2,128,1,float16,fp8,32767,0.030929601192474364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,1,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,1,0.00838399976491928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,3,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,3,0.008324799686670303
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,7,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,7,0.008419200032949447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,15,0.016774399578571318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,15,0.008323200047016144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,31,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,255,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,31,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,63,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,511,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,63,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,127,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,127,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,255,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,511,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,1023,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,1023,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,2047,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,2047,0.009408000111579894
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,4095,0.020703999698162077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,4095,0.010609599947929382
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,8191,0.04108479917049408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,8191,0.013726399838924408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,16383,0.05971840023994446
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,16383,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,float16,32767,0.09497439861297607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,1,0.027057600021362305
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,8,4,128,1,float16,fp8,32767,0.047310400009155276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,1,0.016548800468444824
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,3,0.027806401252746582
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,3,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,7,0.02892960011959076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,7,0.016523200273513793
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,15,0.028907200694084166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,15,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,31,0.028833600878715514
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,31,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,63,0.02884640097618103
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,63,0.016683200001716615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,127,0.02892639935016632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,127,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,1023,0.09950879812240601
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,255,0.043875199556350705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,255,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,float16,511,0.06362720131874085
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,511,0.029912000894546507
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,1,128,1,float16,fp8,1023,0.048814401030540466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,1,0.037201601266860965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,1,0.023825600743293762
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,3,0.03840160071849823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,3,0.024695999920368195
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,7,0.037254399061203
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,7,0.02473919987678528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,15,0.03866559863090515
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,15,0.02420320063829422
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,31,0.03721120059490204
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,31,0.02393600046634674
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,63,0.039150398969650266
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,63,0.024849599599838255
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,127,0.04729439914226532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,127,0.02423679977655411
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,1023,0.17214560508728027
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,255,0.0626528024673462
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,255,0.030870398879051207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,float16,511,0.09937279820442199
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,511,0.048363199830055235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,1,0.05756800174713135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,2,128,1,float16,fp8,1023,0.08626719713211059
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,1,0.03943839967250824
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,3,0.057524800300598145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,3,0.03930560052394867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,7,0.05750880241394043
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,7,0.039299198985099794
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,15,0.05755680203437805
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,127,0.06748800277709961
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,15,0.03979519903659821
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,31,0.061643201112747195
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,31,0.0393775999546051
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,63,0.0645632028579712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,63,0.03930720090866089
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,127,0.04121760129928589
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,255,0.10050879716873169
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,255,0.04972319900989532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,511,0.17377439737319947
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,511,0.08575680255889892
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,1,0.037529599666595456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,1,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,float16,1023,0.3205296039581299
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,8,4,128,1,float16,fp8,1023,0.1590127944946289
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,3,0.03721120059490204
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,31,0.038145598769187924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,31,0.024769599735736846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,3,0.024884800612926482
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,7,0.03715839982032776
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,7,0.02480800002813339
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,15,0.038689601421356204
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,15,0.024881599843502043
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,63,0.04111199975013733
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,63,0.0248879998922348
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,127,0.04726879894733429
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,127,0.02496480047702789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,255,0.0640175998210907
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,255,0.031119999289512635
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,float16,511,0.10081759691238404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,1,128,1,float16,fp8,511,0.049675199389457705
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,1,0.05764319896697998
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,1,0.04121600091457367
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,3,0.057942402362823484
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,3,0.041065600514411923
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,7,0.05762879848480225
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,7,0.041116800904273984
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,15,0.05771200060844421
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,15,0.04133279919624329
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,31,0.06180800199508667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,31,0.04121440052986145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,63,0.06371840238571166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,63,0.0411215990781784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,127,0.06829599738121032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,127,0.04329439997673035
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,255,0.1013983964920044
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,255,0.05196639895439148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,float16,511,0.17418880462646485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,2,128,1,float16,fp8,511,0.08728479743003845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,1,0.10062400102615357
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,1,0.07213119864463806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,15,0.10627360343933105
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,3,0.10041760206222534
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,3,0.07222880125045776
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,7,0.09991679787635803
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,7,0.07245759963989258
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,15,0.0719648003578186
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,31,0.10877439975738526
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,31,0.07338079810142517
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,63,0.10884640216827393
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,63,0.07394239902496338
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,127,0.11472959518432617
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,127,0.07405920028686523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,255,0.1819535970687866
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,255,0.09070079922676086
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,1,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,1,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,15,0.016711999475955964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,float16,511,0.3263887882232666
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,3,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,8,4,128,1,float16,fp8,511,0.16177599430084227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,63,0.018750399351119995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,3,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,7,0.017433600127696992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,7,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,15,0.008544000238180161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,255,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,31,0.017403200268745422
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,31,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,127,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,63,0.008587200194597244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,127,0.008526399731636047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,255,0.018475200235843658
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,511,0.008321599662303924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,511,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,1023,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,1023,0.008497600257396699
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,2047,0.01873439997434616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,2047,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,4095,0.021113599836826324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,4095,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,8191,0.022896000742912294
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,8191,0.012673600018024445
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,3,0.016967999935150146
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,16383,0.04488799870014191
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,16383,0.01607840061187744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,float16,32767,0.06191040277481079
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,1,128,1,float16,fp8,32767,0.030921599268913268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,1,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,1,0.008380799740552902
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,3,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,7,0.01650719940662384
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,7,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,127,0.01660960018634796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,127,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,15,0.016726399958133697
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,15,0.008345600217580795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,31,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,31,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,63,0.016964800655841827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,63,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,255,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,255,0.008380799740552902
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,511,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,511,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,1023,0.01668799966573715
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,1023,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,2047,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,2047,0.009457600116729737
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,4095,0.020644800364971162
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,32767,0.09439679980278015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,32767,0.04739519953727722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,4095,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,3,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,8191,0.04048320055007935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,3,0.00849440023303032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,8191,0.013892799615859985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,float16,16383,0.05921120047569275
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,2,128,1,float16,fp8,16383,0.028939199447631837
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,1,0.017579199373722078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,1,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,7,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,63,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,7,0.008404800295829773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,15,0.017774400115013123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,127,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,15,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,31,0.01675039976835251
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,31,0.008326400071382523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,63,0.01685599982738495
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,127,0.017243200540542604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,255,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,255,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,511,0.017972800135612487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,511,0.008345600217580795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,1023,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,1023,0.008551999926567078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,2047,0.020871999859809875
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,2047,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,4095,0.04107680022716522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,4095,0.012639999389648438
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,8191,0.0591808021068573
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,32767,0.1674831986427307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,8191,0.028798401355743408
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,float16,16383,0.09464799761772155
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,16383,0.046167999505996704
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,1,0.017348800599575043
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,8,4,128,1,float16,fp8,32767,0.08312159776687622
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,1,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,3,0.017294399440288544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,3,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,7,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,7,0.008504000306129456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,15,0.018423999845981597
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,15,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,31,0.016667200624942778
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,31,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,63,0.018563200533390046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,63,0.008555199950933456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,127,0.01671359986066818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,127,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,255,0.017560000717639922
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,255,0.00854720026254654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,511,0.018564799427986146
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,511,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,1023,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,1023,0.008854400366544724
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,2047,0.020777599513530733
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,2047,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,4095,0.04110879898071289
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,4095,0.014475199580192565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,8191,0.05928320288658142
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,32767,0.08295040130615235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,8191,0.029054400324821473
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,16383,0.09473279714584351
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,16383,0.04645920097827912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,32767,0.16665120124816896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,1,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,1,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,3,0.016604800522327424
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,float16,65535,0.3107151985168457
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,1,128,1,float16,fp8,65535,0.15457279682159425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,3,0.00833280012011528
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,7,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,63,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,7,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,15,0.016572800278663636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,15,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,31,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,31,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,63,0.016564799845218657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,1023,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,127,0.016774399578571318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,1023,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,2047,0.04037919938564301
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,127,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,255,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,255,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,511,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,511,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,2047,0.01271200031042099
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,4095,0.058822399377822875
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,4095,0.02756800055503845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,8191,0.09483039975166321
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,8191,0.04565280079841614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,16383,0.16597919464111327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,16383,0.08251839876174927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,32767,0.30975520610809326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,32767,0.15500799417495728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,1,0.012624000012874604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,7,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,1,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,3,0.012671999633312225
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,3,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,fp8,65535,0.30399360656738283
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,7,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,4,2,128,1,float16,float16,65535,0.5945807933807373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,15,0.012582400441169738
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,15,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,31,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,255,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,31,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,63,0.014590400457382201
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,63,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,127,0.014534400403499603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,255,0.013851200044155122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,127,0.008395200222730636
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,511,0.014510400593280792
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,511,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,1023,0.014505599439144135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,1023,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,2047,0.014678399264812469
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,2047,0.008886399865150451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,4095,0.016620799899101257
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,4095,0.010558400303125381
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,8191,0.01883520036935806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,8191,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,16383,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,16383,0.014707200229167938
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,32767,0.028958401083946227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,32767,0.02382880002260208
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,float16,65535,0.03150239884853363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,1,128,1,float16,fp8,65535,0.024852800369262695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,1,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,1,0.008816000074148178
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,3,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,3,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,7,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,7,0.008580800145864487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,15,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,15,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,31,0.015531200170516967
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,31,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,63,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,63,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,127,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,127,0.00857120007276535
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,255,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,255,0.008399999886751174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,511,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,511,0.009296000003814697
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,1023,0.014572800695896148
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,1023,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,16383,0.022860799729824067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,2047,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,2047,0.009969600290060044
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,4095,0.01788160055875778
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,4095,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,65535,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,8191,0.020006400346755982
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,8191,0.012668800354003907
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,16383,0.016689600050449373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,32767,0.025519999861717223
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,fp8,32767,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,4,2,128,1,float16,float16,65535,0.02898240089416504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,1,0.014531199634075165
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,1,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,3,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,3,0.008528000116348267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,7,0.014484800398349762
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,7,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,127,0.014502400159835815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,15,0.014567999541759491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,15,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,31,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,31,0.008348800241947174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,63,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,1023,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,63,0.00854559987783432
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,2047,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,127,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,2047,0.009480000287294389
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,255,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,255,0.00838399976491928
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,511,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,511,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,1023,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,4095,0.017107200622558594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,4095,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,8191,0.020615999400615693
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,8191,0.012425599992275238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,16383,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,16383,0.016697600483894348
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,32767,0.02629440128803253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,32767,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,float16,65535,0.029172798991203307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,1,128,1,float16,fp8,65535,0.020427200198173522
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,1,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,1,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,3,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,3,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,7,0.017073599994182585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,7,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,15,0.018764799833297728
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,15,0.00841279998421669
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,31,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,31,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,63,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,63,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,127,0.017529599368572235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,1023,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,127,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,255,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,255,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,511,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,511,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,1023,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,2047,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,2047,0.008878400176763534
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,4095,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,4095,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,8191,0.022881600260734557
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,8191,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,16383,0.024736000597476958
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,16383,0.014496000111103058
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,32767,0.027000001072883605
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,32767,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,float16,65535,0.04938240051269531
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,7,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,4,2,128,1,float16,fp8,65535,0.018488000333309173
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,15,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,1,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,31,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,1,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,3,0.016710400581359863
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,3,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,127,0.01653439998626709
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,7,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,15,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,31,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,63,0.016737599670886994
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,63,0.008342400193214417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,127,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,255,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,255,0.008371199667453765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,511,0.016548800468444824
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,511,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,1023,0.017425599694252013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,1023,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,2047,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,2047,0.00894080027937889
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,4095,0.020615999400615693
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,4095,0.010681600123643876
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,32767,0.04713920056819916
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,8191,0.04027200043201447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,1,0.018172800540924072
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,8191,0.014094400405883788
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,16383,0.05776159763336182
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,16383,0.0289247989654541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,32767,0.09464319944381713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,float16,65535,0.16692960262298584
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,1,128,1,float16,fp8,65535,0.08479359745979309
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,1,0.008556800335645676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,3,0.016689600050449373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,3,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,7,0.017345599830150604
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,63,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,7,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,15,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,15,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,31,0.01677920073270798
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,31,0.008350399881601333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,63,0.017134399712085725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,127,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,127,0.00835679993033409
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,255,0.017369599640369417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,255,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,511,0.018542400002479552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,511,0.008406399935483932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,1023,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,1023,0.00998080000281334
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,2047,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,2047,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,4095,0.041176000237464906
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,4095,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,8191,0.058190399408340455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,8191,0.028995200991630554
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,16383,0.0945360004901886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,16383,0.047111999988555905
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,65535,0.3111920118331909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,float16,32767,0.16681920289993285
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,32767,0.08306080102920532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,7,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,1,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,1,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,4,2,128,1,float16,fp8,65535,0.15557760000228882
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,3,0.018561600148677825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,3,0.008399999886751174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,7,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,15,0.00854720026254654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,15,0.018401600420475006
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,31,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,255,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,31,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,63,0.01674399971961975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,63,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,1023,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,127,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,127,0.008523199707269669
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,255,0.018774400651454925
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,511,0.01714400053024292
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,511,0.008553600311279297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,1023,0.01733119934797287
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,2047,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,2047,0.008662399649620057
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,4095,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,4095,0.010548800230026245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,8191,0.02287680059671402
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,8191,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,16383,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,16383,0.014641599357128143
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,32767,0.0262719988822937
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,32767,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,float16,65535,0.04716480076313019
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,1,128,1,float16,fp8,65535,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,1,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,1,0.008638399839401244
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,3,0.01855359971523285
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,3,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,7,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,7,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,15,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,15,0.008551999926567078
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,31,0.016812799870967864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,31,0.008568000048398972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,63,0.01687680035829544
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,63,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,127,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,127,0.008537600189447403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,255,0.017155200242996216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,2047,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,255,0.00846880003809929
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,511,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,511,0.008398400247097015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,1023,0.017015999555587767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,8191,0.01247519999742508
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,1023,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,2047,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,4095,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,4095,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,8191,0.022169600427150726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,16383,0.02475520074367523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,16383,0.013315199315547943
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,32767,0.04530400037765503
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,32767,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,float16,65535,0.06406880021095276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,4,2,128,1,float16,fp8,65535,0.03245759904384613
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,15,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,1,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,1,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,3,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,3,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,7,0.017260800302028655
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,7,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,15,0.008355200290679932
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,31,0.018489600718021394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,31,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,63,0.016715200245380403
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,63,0.008377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,1023,0.020812800526618956
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,127,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,127,0.008352000266313553
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,255,0.01759680062532425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,255,0.008326400071382523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,511,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,511,0.008419200032949447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,8191,0.0944271981716156
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,1023,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,2047,0.041131201386451724
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,2047,0.012745599448680877
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,16383,0.1671231985092163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,4095,0.05872480273246765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,4095,0.028372800350189208
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,8191,0.04601759910583496
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,16383,0.08218399882316589
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,1,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,1,0.008640000224113464
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,float16,32767,0.3096544027328491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,1,128,1,float16,fp8,32767,0.15463039875030518
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,3,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,3,0.008563199639320373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,7,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,7,0.008574400097131729
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,15,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,15,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,31,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,31,0.00873439982533455
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,63,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,63,0.008497600257396699
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,127,0.018753600120544434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,127,0.009055999666452407
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,255,0.018760000169277192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,255,0.00851999968290329
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,511,0.020657600462436677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,511,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,1023,0.041238400340080264
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,1023,0.012600000202655792
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,2047,0.05969439744949341
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,2047,0.028891199827194215
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,4095,0.09604960083961486
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,16383,0.31232481002807616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,4095,0.04721280038356781
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,8191,0.16719199419021608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,3,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,8191,0.08506240248680115
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,16383,0.16085280179977418
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,1,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,1,0.008553600311279297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,3,0.008419200032949447
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,float16,32767,0.6023551940917968
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,4,2,128,1,float16,fp8,32767,0.31145761013031004
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,7,0.01876640021800995
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,7,0.008491200208663941
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,15,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,15,0.008689600229263305
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,31,0.019801600277423857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,31,0.008779200166463852
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,63,0.02022079974412918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,63,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,127,0.020395199954509734
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,127,0.0086496002972126
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,255,0.02022400051355362
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,255,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,511,0.02258560061454773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,511,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,4095,0.09679520130157471
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,1023,0.04126079976558685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,1023,0.01422400027513504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,2047,0.05976960062980652
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,2047,0.028403198719024657
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,4095,0.04776960015296936
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,1,0.02261279970407486
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,fp8,8191,0.08400319814682007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,1,128,1,float16,float16,8191,0.16959680318832399
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,1,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,3,0.022580799460411072
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,3,0.01061599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,7,0.02266079932451248
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,7,0.010790400207042694
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,15,0.022753599286079406
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,15,0.010676799714565277
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,31,0.02266079932451248
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,31,0.011479999870061874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,63,0.022767999768257143
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,63,0.010728000104427338
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,127,0.022703999280929567
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,127,0.010908800363540649
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,255,0.0227743998169899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,255,0.010529600083827972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,511,0.04140479862689972
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,511,0.014699199795722961
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,1023,0.06161119937896729
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,1023,0.028799998760223388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,2047,0.09817439913749695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,2047,0.04747680127620697
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,4095,0.17065759897232055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,4095,0.08440160155296325
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,1,0.022830399870872497
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,1,0.01244639977812767
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,float16,8191,0.31668639183044434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,4,2,128,1,float16,fp8,8191,0.15820000171661378
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,3,0.022728000581264497
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,3,0.010822399705648422
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,7,0.022767999768257143
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,7,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,15,0.02337760031223297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,15,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,31,0.012425599992275238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,63,0.02281759977340698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,63,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,31,0.023423999547958374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,127,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,127,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,255,0.0228752002120018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,255,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,511,0.04506720006465912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,511,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,1023,0.06259999871253967
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,2047,0.10005439519882202
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,1023,0.029236799478530882
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,2047,0.0478767991065979
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,float16,4095,0.17233760356903077
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,7,0.014619199931621552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,1,0.028035199642181395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,1,128,1,float16,fp8,4095,0.08377599716186523
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,1,0.015095999836921692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,3,0.028932800889015196
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,3,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,7,0.028993600606918336
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,15,0.02882080078125
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,15,0.014545600116252898
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,31,0.028828799724578857
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,31,0.01496479958295822
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,63,0.028803199529647827
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,63,0.014684799313545226
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,127,0.028857600688934327
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,127,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,255,0.04339039921760559
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,255,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,511,0.06171839833259583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,511,0.029016000032424927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,1023,0.09881600141525268
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,1023,0.04783360064029694
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,2047,0.17092640399932862
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,2047,0.08561279773712158
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,1,0.01700959950685501
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,1,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,fp8,4095,0.16042239665985109
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,4,2,128,1,float16,float16,4095,0.31411840915679934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,3,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,3,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,7,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,7,0.008404800295829773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,63,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,15,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,15,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,31,0.018515199422836304
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,31,0.008515200018882752
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,63,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,127,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,127,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,1023,0.008550400286912918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,255,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,255,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,511,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,511,0.008665599673986436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,1023,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,8191,0.012520000338554382
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,2047,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,16383,0.013463999330997466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,2047,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,4095,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,4095,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,8191,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,16383,0.024835200607776643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,32767,0.04546720087528229
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,32767,0.016519999504089354
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,float16,65535,0.06385279893875122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,1,128,1,float16,fp8,65535,0.03246079981327057
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,1,0.01713919937610626
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,1,0.008804800361394883
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,3,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,3,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,7,0.017547200620174407
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,7,0.008430399745702744
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,15,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,15,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,31,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,31,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,63,0.016731199622154237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,63,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,127,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,127,0.0083856001496315
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,255,0.008483199775218964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,2047,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,255,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,511,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,511,0.008422400057315826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,1023,0.016832000017166136
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,1023,0.00840959995985031
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,2047,0.019324800372123717
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,4095,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,4095,0.010815999656915664
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,8191,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,8191,0.012494400143623352
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,16383,0.04387840032577515
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,16383,0.014632000029087067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,32767,0.06236799955368042
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,32767,0.030880001187324525
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,float16,65535,0.09850720167160035
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,4,2,128,1,float16,fp8,65535,0.049404799938201904
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,1,0.027214398980140685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,1,0.015537600219249725
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,3,0.0288239985704422
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,3,0.015603199601173401
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,7,0.028814399242401124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,7,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,15,0.028782400488853454
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,15,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,31,0.02889760136604309
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,31,0.015963199734687804
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,63,0.02881760001182556
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,63,0.01611520051956177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,127,0.029156801104545594
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,1023,0.0995199978351593
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,127,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,255,0.043808001279830935
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,255,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,1,0.024724799394607543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,511,0.06351360082626342
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,3,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,511,0.028841599822044373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,1023,0.04869439899921417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,float16,2047,0.17135839462280272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,1,0.03812800049781799
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,1,128,1,float16,fp8,2047,0.08600159883499145
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,3,0.03744640052318573
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,7,0.024928000569343568
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,7,0.03781920075416565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,15,0.03914560079574585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,15,0.024982400238513947
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,31,0.03768480122089386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,31,0.024135999381542206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,63,0.039062398672103885
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,63,0.02487040013074875
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,127,0.04660800099372864
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,127,0.023057599365711213
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,255,0.06361119747161866
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,255,0.030697599053382874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,511,0.09946720004081726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,511,0.04819200038909912
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,1023,0.1712175965309143
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,1023,0.08517439961433411
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,1,0.03731200098991394
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,1,0.024879999458789825
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,float16,2047,0.3150576114654541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,3,0.037191998958587644
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,4,2,128,1,float16,fp8,2047,0.15885759592056276
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,3,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,7,0.037118399143218996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,63,0.039201599359512326
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,7,0.024830399453639983
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,15,0.03862560093402863
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,15,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,31,0.0378847986459732
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,31,0.024873599410057068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,63,0.024726399779319765
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,127,0.04671840071678161
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,127,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,255,0.06333919763565063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,1023,0.17248480319976806
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,255,0.03107360005378723
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,1,0.0412304013967514
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,float16,511,0.10033760070800782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,511,0.049374398589134214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,1,0.05762879848480225
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,1,128,1,float16,fp8,1023,0.08645600080490112
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,3,0.05762079954147339
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,3,0.041140800714492796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,7,0.05762400031089783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,7,0.041212800145149234
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,15,0.057574397325515746
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,15,0.04120000004768372
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,31,0.0614031970500946
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,31,0.04112319946289063
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,63,0.06385279893875122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,63,0.041140800714492796
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,127,0.06797599792480469
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,127,0.04316799938678741
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,255,0.10085920095443726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,255,0.0516319990158081
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,511,0.1721743941307068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,511,0.08619199991226197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,1,0.017987200617790224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,1,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,float16,1023,0.31568160057067873
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,3,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,4,2,128,1,float16,fp8,1023,0.1597632050514221
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,3,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,7,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,63,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,7,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,15,0.016820800304412842
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,15,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,255,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,31,0.016947199404239655
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,511,0.008516799658536911
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,31,0.008591999858617782
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,63,0.018529599905014037
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,127,0.016575999557971954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,127,0.008644799888134002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,255,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,511,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,1023,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,1023,0.008507200330495835
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,2047,0.018694399297237395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,2047,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,4095,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,4095,0.011108800023794174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,8191,0.022697600722312927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,8191,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,16383,0.04292320013046265
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,16383,0.016495999693870545
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,32767,0.06363999843597412
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,32767,0.030899199843406677
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,float16,65535,0.09829440116882324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,1,128,1,float16,fp8,65535,0.049553599953651425
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,15,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,1,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,31,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,1,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,3,0.016683200001716615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,3,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,7,0.01672160029411316
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,7,0.008347199857234954
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,255,0.016625599563121797
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,15,0.008364800363779068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,511,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,31,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,511,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,63,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,63,0.008342400193214417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,127,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,127,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,255,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,1023,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,1023,0.008579199761152267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,2047,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,2047,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,16383,0.05916000008583069
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,4095,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,4095,0.01061279997229576
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,8191,0.041206398606300355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,8191,0.014819200336933135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,16383,0.029014399647712706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,32767,0.0946672022342682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,32767,0.04725759923458099
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,float16,65535,0.16552480459213256
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,1,0.01775359958410263
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,4,2,128,1,float16,fp8,65535,0.08379520177841186
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,1,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,3,0.016735999286174773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,3,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,7,0.016967999935150146
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,7,0.008369600027799606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,15,0.016977599263191222
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,15,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,31,0.01780160069465637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,31,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,63,0.016606399416923524
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,63,0.008643200248479843
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,127,0.018199999630451203
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,127,0.008399999886751174
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,255,0.018542400002479552
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,255,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,511,0.017023999989032746
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,511,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,1023,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,1023,0.010279999673366546
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,2047,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,2047,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,4095,0.04103200137615204
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,4095,0.012723200023174286
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,8191,0.05939679741859436
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,8191,0.028625598549842833
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,16383,0.09462080001831055
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,16383,0.047035199403762815
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,32767,0.16675039529800414
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,32767,0.08237919807434083
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,65535,0.3097951889038086
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,65535,0.15532159805297852
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,1,0.012667199969291687
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,1,0.008584000170230865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,3,0.014665600657463074
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,3,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,fp8,131071,0.29953598976135254
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,64,1,2,1,128,1,float16,float16,131071,0.5968815803527832
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,7,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,7,0.008668799698352814
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,15,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,15,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,31,0.014577600359916686
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,31,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,63,0.013787199556827546
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,127,0.014496000111103058
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,1023,0.013974399864673614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,63,0.008404800295829773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,2047,0.014505599439144135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,127,0.00854720026254654
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,255,0.013683199882507324
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,255,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,8191,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,511,0.015820799767971037
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,8191,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,16383,0.02258400022983551
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,511,0.008548799902200699
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,1023,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,2047,0.008511999994516373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,4095,0.015340800583362579
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,4095,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,16383,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,32767,0.030876800417900085
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,32767,0.024724799394607543
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,65535,0.03148640096187592
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,65535,0.024852800369262695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,float16,131071,0.033025598526000975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1,1,2,1,128,1,float16,fp8,131071,0.02689119875431061
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,1,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,1,0.008584000170230865
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,3,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,3,0.008337599784135818
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,7,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,7,0.00836160033941269
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,15,0.014590400457382201
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,15,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,31,0.016516800224781036
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,31,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,63,0.01653759926557541
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,63,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,127,0.016355200111865996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,127,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,255,0.014657600224018097
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,255,0.008366400003433227
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,511,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,511,0.008580800145864487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,1023,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,1023,0.008561599999666214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,2047,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,2047,0.008868800103664398
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,16383,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,4095,0.016622400283813475
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,4095,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,8191,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,8191,0.012580800056457519
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,16383,0.022702400386333466
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,32767,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,1,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,32767,0.026737600564956665
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,3,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,65535,0.02892639935016632
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,65535,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,float16,131071,0.04863840043544769
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2,1,2,1,128,1,float16,fp8,131071,0.0227743998169899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,1,0.00833119973540306
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,7,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,31,0.008327999711036682
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,3,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,7,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,15,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,15,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,31,0.016510400176048278
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,63,0.016569599509239197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,63,0.008433599770069123
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,127,0.016655999422073364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,127,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,255,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,255,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,511,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,511,0.00833439975976944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,1023,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,1023,0.008353599905967712
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,2047,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,2047,0.009431999921798707
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,4095,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,32767,0.09411839842796325
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,4095,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,8191,0.040673598647117615
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,8191,0.013817599415779114
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,16383,0.05968160033226013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,16383,0.028772801160812378
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,32767,0.04747839868068695
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,3,0.018563200533390046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,65535,0.1667695999145508
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,65535,0.08479359745979309
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,1,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,1,0.008644799888134002
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,float16,131071,0.3102096080780029
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,32,1,2,1,128,1,float16,fp8,131071,0.15800800323486328
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,3,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,7,0.01855680048465729
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,7,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,15,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,15,0.008816000074148178
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,255,0.017679999768733978
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,31,0.01758880019187927
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,31,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,63,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,63,0.008393599838018417
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,127,0.018768000602722167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,127,0.008457600325345992
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,255,0.00849440023303032
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,511,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,1023,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,1023,0.008555199950933456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,511,0.008886399865150451
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,2047,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,2047,0.008617600053548813
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,4095,0.018795199692249298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,4095,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,8191,0.022625599801540375
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,8191,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,16383,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,16383,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,32767,0.026822400093078614
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,32767,0.016476799547672272
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,3,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,65535,0.049300798773765565
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,65535,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,float16,131071,0.06559680104255676
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,15,0.016806399822235106
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,4,1,2,1,128,1,float16,fp8,131071,0.03500800132751465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,1,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,1,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,3,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,7,0.016711999475955964
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,7,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,15,0.008345600217580795
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,31,0.018374399840831758
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,31,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,63,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,63,0.008339200168848038
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,127,0.0184688001871109
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,127,0.008340799808502197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,255,0.016752000153064727
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,255,0.008363199979066848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,511,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,511,0.008628799766302108
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,1023,0.020689600706100465
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,1023,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,2047,0.041064000129699706
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,2047,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,4095,0.05865439772605896
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,4095,0.028033599257469177
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,8191,0.09458079934120178
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,8191,0.04556480050086975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,16383,0.1674496054649353
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,16383,0.08221920132637024
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,1,0.01884479969739914
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,32767,0.30994880199432373
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,32767,0.15421279668807983
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,1,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,3,0.018768000602722167
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,3,0.008502399921417237
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,7,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,7,0.008640000224113464
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,fp8,65535,0.30164480209350586
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,128,1,2,1,128,1,float16,float16,65535,0.5977503776550293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,15,0.01995840072631836
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,15,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,31,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,31,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,63,0.019011199474334717
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,511,0.02166240066289902
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,63,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,127,0.008489599823951722
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,127,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,255,0.01950560063123703
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,255,0.008990400284528733
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,1023,0.04274719953536987
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,4095,0.04692319929599762
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,511,0.010564800351858139
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,1023,0.01398559957742691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,2047,0.06028320193290711
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,2047,0.028217598795890808
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,4095,0.09646720290184022
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,8191,0.16893600225448607
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,8191,0.0841488003730774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,1,0.02279520034790039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,1,0.01247519999742508
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,fp8,16383,0.15776959657669068
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,3,0.02269600033760071
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,256,1,2,1,128,1,float16,float16,16383,0.312227201461792
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,3,0.01067200005054474
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,7,0.02312159985303879
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,7,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,15,0.023192000389099122
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,15,0.012427199631929398
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,31,0.022891199588775633
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,31,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,63,0.023665599524974823
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,63,0.010817600041627884
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,127,0.023231999576091768
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,1023,0.06320319771766662
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,127,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,255,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,255,0.0124208003282547
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,511,0.043863999843597415
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,511,0.014833599328994751
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,1023,0.02897599935531616
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,2047,0.1002992033958435
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,2047,0.047307199239730834
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,4095,0.17336640357971192
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,4095,0.08257759809494018
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,1,0.018563200533390046
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,1,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,float16,8191,0.32133119106292723
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,512,1,2,1,128,1,float16,fp8,8191,0.15671520233154296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,3,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,3,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,7,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,7,0.008553600311279297
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,15,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,15,0.008446399867534638
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,31,0.017073599994182585
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,31,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,63,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,511,0.008398400247097015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,63,0.00835999995470047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,127,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,127,0.008382400125265121
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,255,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,255,0.008579199761152267
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,511,0.016755199432373045
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,1023,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,1023,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,2047,0.019020800292491914
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,2047,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,4095,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,4095,0.01064319983124733
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,8191,0.02239679992198944
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,16383,0.013966399431228637
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,8191,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,32767,0.04540959894657135
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,16383,0.024667200446128846
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,32767,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,65535,0.06415200233459473
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,65535,0.032923200726509096
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,float16,131071,0.10044000148773194
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,8,1,2,1,128,1,float16,fp8,131071,0.051560002565383914
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,1,0.028193598985671996
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,3,0.02778719961643219
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,3,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,1,0.016569599509239197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,7,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,7,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,15,0.028784000873565675
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,15,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,127,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,31,0.02781279981136322
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,31,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,63,0.02889919877052307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,63,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,127,0.028808000683784484
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,1023,0.09938560128211975
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,255,0.043438398838043214
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,255,0.016569599509239197
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,511,0.06209920048713684
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,511,0.029099199175834655
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,1023,0.0482015997171402
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,2047,0.08480160236358643
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,2047,0.1719472050666809
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,1,0.037115201354026794
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,1,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,float16,4095,0.31752159595489504
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,3,0.03707039952278137
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,1024,1,2,1,128,1,float16,fp8,4095,0.16016479730606079
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,3,0.02499680072069168
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,7,0.037124800682067874
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,7,0.024843199551105498
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,15,0.03895680010318756
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,31,0.024855999648571013
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,15,0.024979199469089507
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,31,0.03916000127792359
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,255,0.030907198786735535
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,63,0.03884640038013458
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,63,0.024792000651359558
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,127,0.047286400198936464
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,127,0.024780799448490144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,255,0.06334879994392395
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,511,0.0990768015384674
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,511,0.04890719950199127
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,1023,0.1719391942024231
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,1023,0.08590720295906067
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,1,0.01671680063009262
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,float16,2047,0.31675519943237307
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,1,0.008455999940633774
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,2048,1,2,1,128,1,float16,fp8,2047,0.15792479515075683
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,3,0.018512000143527985
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,3,0.008488000184297562
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,7,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,7,0.008374399691820144
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,15,0.01674239933490753
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,15,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,31,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,31,0.008486399799585343
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,63,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,63,0.008668799698352814
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,1023,0.01818079948425293
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,127,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,127,0.008614400029182434
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,255,0.016728000342845918
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,255,0.008404800295829773
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,511,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,8191,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,511,0.008526399731636047
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,1023,0.008425600081682205
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,2047,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,2047,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,4095,0.020692799985408784
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,4095,0.010824000090360641
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,65535,0.09812639951705933
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,8191,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,16383,0.042998400330543515
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,16383,0.014820800721645355
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,32767,0.06157439947128296
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,32767,0.031121599674224853
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,65535,0.04960800111293793
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,fp8,131071,0.08633279800415039
SGLang,0.5.6.post2,NVIDIA B200,generation_attention,trtllm_mha,16,1,2,1,128,1,float16,float16,131071,0.1703328013420105
