framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,beam_width,attn_dtype,kv_cache_dtype,step,latency
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,float16,0,44.14948425292969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,fp8,fp8,0,34.86885375976563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,1,128,1,float16,fp8,0,34.97383422851563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,float16,0,44.189794921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,fp8,fp8,0,35.002020263671874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,2,128,1,float16,fp8,0,34.93772583007812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,float16,0,44.84974060058594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,float16,fp8,0,35.02408142089844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,4,128,1,fp8,fp8,0,34.91069641113281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,fp8,0,35.251626586914064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,fp8,0,18.182453918457032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,fp8,0,17.3759765625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,float16,float16,0,45.42595520019531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,float16,float16,0,21.96129608154297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,fp8,fp8,0,18.081402587890626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,96,128,1,float16,float16,0,28.641403198242188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,1,128,1,fp8,fp8,0,17.649842834472658
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,fp8,0,17.60649719238281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,96,8,128,1,fp8,fp8,0,34.935055541992185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,fp8,fp8,0,17.488404846191408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,2,128,1,float16,float16,0,22.124766540527343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,fp8,0,17.625889587402344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,fp8,fp8,0,17.281031799316406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,4,128,1,float16,float16,0,22.32470703125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,fp8,fp8,0,9.20406265258789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,fp8,0,9.258417510986328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,fp8,0,17.600440979003906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,96,128,1,float16,float16,0,14.412477111816406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,float16,0,11.436697387695313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,fp8,fp8,0,17.505636596679686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,float16,fp8,0,8.649646759033203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,96,8,128,1,float16,float16,0,22.569415283203124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,1,128,1,fp8,fp8,0,8.781079864501953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,fp8,0,8.752267456054687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,float16,float16,0,10.968447875976562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,2,128,1,fp8,fp8,0,8.665620422363281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,fp8,fp8,0,8.689447784423828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,fp8,0,8.720998382568359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,4,128,1,float16,float16,0,10.97301788330078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,fp8,0,4.550167846679687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,fp8,fp8,0,4.6274574279785154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,fp8,fp8,0,8.803337860107423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,fp8,0,8.828620910644531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,96,128,1,float16,float16,0,7.137556457519532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,float16,0,5.528054428100586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,96,8,128,1,float16,float16,0,11.41380615234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,float16,fp8,0,4.511000061035157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,1,128,1,fp8,fp8,0,4.290849685668945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,fp8,0,4.256289672851563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,fp8,fp8,0,4.335692977905273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,2,128,1,float16,float16,0,5.478844833374024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,fp8,0,4.63709602355957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,float16,float16,0,5.2403312683105465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,4,128,1,fp8,fp8,0,4.631628799438476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,float16,0,5.683380889892578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,float16,fp8,0,4.321015930175781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,96,8,128,1,fp8,fp8,0,4.3490558624267575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,fp8,fp8,0,20.013299560546876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,fp8,0,20.13774871826172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,fp8,0,20.35187683105469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,fp8,fp8,0,20.007598876953125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,fp8,0,20.56951141357422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,1,128,1,float16,float16,0,25.687890625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,2,128,1,float16,float16,0,25.797366333007812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,float16,float16,0,25.7958984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,fp8,0,10.62801742553711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,fp8,fp8,0,10.543915557861329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,4,128,1,fp8,fp8,0,20.059278869628905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,float16,0,12.865823364257812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,fp8,0,20.400453186035158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,fp8,fp8,0,20.385740661621092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,float16,fp8,0,10.130254364013672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,96,128,1,float16,float16,0,17.366517639160158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,96,8,128,1,float16,float16,0,26.38804931640625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,1,128,1,fp8,fp8,0,10.094297790527344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,fp8,0,10.04211654663086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,fp8,fp8,0,10.09378890991211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,2,128,1,float16,float16,0,12.790068817138671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,float16,0,13.031288146972656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,float16,fp8,0,10.204020690917968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,4,128,1,fp8,fp8,0,10.106803131103515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,fp8,0,10.201569366455079
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,fp8,0,5.297987365722657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,float16,float16,0,12.981828308105468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,96,8,128,1,fp8,fp8,0,10.231590270996094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,float16,float16,0,8.774205017089844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,96,128,1,fp8,fp8,0,5.515094375610351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,fp8,0,4.983472061157227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,float16,float16,0,6.369384002685547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,1,128,1,fp8,fp8,0,5.019473648071289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,float16,0,6.224190521240234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,float16,fp8,0,5.106044769287109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,2,128,1,fp8,fp8,0,5.096004867553711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,fp8,0,5.028387069702148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,fp8,fp8,0,4.989270401000977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,4,128,1,float16,float16,0,6.341798400878906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,fp8,0,2.7364736557006837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,fp8,0,5.0506446838378904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,float16,float16,0,6.285891342163086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,fp8,fp8,0,2.6892816543579103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,float16,0,3.1077632904052734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,96,128,1,float16,float16,0,4.329719924926758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,96,8,128,1,fp8,fp8,0,5.0907024383544925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,float16,fp8,0,2.69860954284668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,1,128,1,fp8,fp8,0,2.533940887451172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,float16,0,2.9298864364624024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,float16,fp8,0,2.7046863555908205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,float16,0,2.950943946838379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,2,128,1,fp8,fp8,0,3.260163116455078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,float16,fp8,0,2.5557647705078126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,4,128,1,fp8,fp8,0,2.503497505187988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,fp8,0,2.8243488311767577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,float16,float16,0,3.113256072998047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,96,8,128,1,fp8,fp8,0,2.4822223663330076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,fp8,0,14.19390869140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,fp8,fp8,0,14.221113586425782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,fp8,0,14.31580810546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,fp8,0,14.02698211669922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,fp8,fp8,0,14.135768127441406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,1,128,1,float16,float16,0,18.176106262207032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,2,128,1,float16,float16,0,18.282373046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,float16,float16,0,18.269635009765626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,fp8,0,7.819356536865234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,fp8,fp8,0,7.5955970764160154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,4,128,1,fp8,fp8,0,14.63031005859375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,float16,0,8.842641448974609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,fp8,0,14.429930114746094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,96,128,1,float16,float16,0,13.098660278320313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,fp8,fp8,0,14.541975402832032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,96,8,128,1,float16,float16,0,18.706614685058593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,float16,fp8,0,7.087586975097656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,1,128,1,fp8,fp8,0,7.099529266357422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,fp8,fp8,0,7.139803314208985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,float16,0,9.217046356201172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,2,128,1,float16,fp8,0,7.277079772949219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,fp8,0,7.1754707336425785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,float16,float16,0,9.194420623779298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,4,128,1,fp8,fp8,0,7.078467559814453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,fp8,0,7.277088165283203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,fp8,0,3.799113464355469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,fp8,fp8,0,3.8258304595947266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,float16,float16,0,9.42510757446289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,fp8,0,3.4793663024902344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,96,128,1,float16,float16,0,6.622809600830078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,float16,float16,0,4.391635131835938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,96,8,128,1,fp8,fp8,0,7.091320037841797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,1,128,1,fp8,fp8,0,3.568217468261719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,fp8,0,3.515633773803711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,float16,float16,0,4.431646347045898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,2,128,1,fp8,fp8,0,3.6093231201171876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,float16,0,4.233011245727539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,float16,fp8,0,3.5341327667236326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,fp8,0,3.5858367919921874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,fp8,0,1.9075935363769532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,4,128,1,fp8,fp8,0,4.176401519775391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,float16,float16,0,4.466704177856445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,96,8,128,1,fp8,fp8,0,3.5861297607421876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,float16,float16,0,3.4771167755126955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,fp8,0,1.8012224197387696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,float16,float16,0,2.043025588989258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,96,128,1,fp8,fp8,0,2.4477840423583985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,1,128,1,fp8,fp8,0,1.7575344085693358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,fp8,fp8,0,1.815283203125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,float16,0,2.0511056900024416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,float16,0,2.1039072036743165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,2,128,1,float16,fp8,0,1.970952033996582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,float16,fp8,0,1.801633644104004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,fp8,fp8,0,1.7840511322021484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,4,128,1,fp8,fp8,0,2.1756591796875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,fp8,0,1.7566991806030274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,96,8,128,1,float16,float16,0,2.158345603942871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,fp8,fp8,0,18.6896484375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,fp8,0,18.79364013671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,fp8,0,18.869181823730468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,fp8,fp8,0,18.800732421875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,fp8,0,18.54741516113281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,1,128,1,float16,float16,0,23.642764282226562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,2,128,1,float16,float16,0,24.004421997070313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,float16,float16,0,24.1417724609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,fp8,0,10.130433654785156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,fp8,fp8,0,10.22295684814453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,float16,0,11.814339447021485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,4,128,1,fp8,fp8,0,18.83659973144531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,96,128,1,float16,float16,0,18.208038330078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,fp8,0,18.938946533203126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,fp8,fp8,0,18.952490234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,float16,fp8,0,9.359095764160156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,96,8,128,1,float16,float16,0,24.903370666503907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,1,128,1,fp8,fp8,0,9.292403411865234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,fp8,0,9.348331451416016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,float16,float16,0,12.000870513916016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,2,128,1,fp8,fp8,0,9.18865737915039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,fp8,0,9.506366729736328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,float16,float16,0,12.273168182373047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,4,128,1,fp8,fp8,0,9.44262237548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,fp8,0,9.473729705810547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,fp8,0,5.027556610107422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,fp8,fp8,0,5.03691520690918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,float16,float16,0,12.337006378173829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,fp8,0,4.656161499023438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,96,8,128,1,fp8,fp8,0,9.459270477294922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,float16,float16,0,5.82322883605957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,96,128,1,float16,float16,0,9.144866943359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,1,128,1,fp8,fp8,0,4.632820892333984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,fp8,0,4.6916351318359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,float16,float16,0,5.8702350616455075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,2,128,1,fp8,fp8,0,4.844313430786133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,float16,0,5.901603317260742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,float16,fp8,0,4.761235046386719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,4,128,1,fp8,fp8,0,4.6183921813964846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,fp8,0,4.688576126098633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,float16,0,4.5486400604248045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,float16,float16,0,6.014689636230469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,fp8,fp8,0,2.468891143798828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,96,8,128,1,fp8,fp8,0,5.29162712097168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,96,128,1,float16,fp8,0,3.095529556274414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,fp8,0,2.352097511291504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,float16,float16,0,2.7246784210205077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,1,128,1,fp8,fp8,0,2.3537439346313476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,fp8,fp8,0,2.346971130371094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,float16,0,2.7561920166015623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,fp8,0,2.309172821044922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,fp8,fp8,0,2.3235071182250975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,2,128,1,float16,fp8,0,2.733590316772461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,4,128,1,float16,float16,0,2.756795120239258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,float16,0,2.938483238220215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,float16,fp8,0,2.332423973083496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,fp8,0,1.292849636077881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,fp8,fp8,0,1.3161024093627929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,float16,0,1.3749391555786132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,float16,fp8,0,1.4416560173034667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,96,8,128,1,fp8,fp8,0,2.30664005279541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,1,128,1,fp8,fp8,0,1.3759552001953126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,96,128,1,float16,float16,0,2.7413232803344725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,float16,0,1.3681471824645997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,float16,fp8,0,1.174623966217041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,2,128,1,fp8,fp8,0,1.2088352203369142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,float16,0,1.4675680160522462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,fp8,fp8,0,1.1781375885009766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,4,128,1,float16,fp8,0,1.3784111976623534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,float16,0,1.4214320182800293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,fp8,fp8,0,1.3921392440795899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,96,8,128,1,float16,fp8,0,1.2226176261901855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,fp8,0,11.103336334228516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,fp8,0,10.793106842041016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,fp8,fp8,0,10.88680648803711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,fp8,fp8,0,10.924231719970702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,fp8,0,10.938180541992187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,1,128,1,float16,float16,0,13.572923278808593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,2,128,1,float16,float16,0,14.052061462402344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,float16,float16,0,14.323944091796875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,fp8,0,6.510084533691407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,fp8,fp8,0,6.305502319335938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,4,128,1,fp8,fp8,0,11.144051361083985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,float16,0,7.011287689208984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,fp8,0,11.154408264160157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,fp8,fp8,0,11.1673583984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,96,128,1,float16,float16,0,11.82799835205078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,96,8,128,1,float16,float16,0,14.615960693359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,float16,fp8,0,5.393175888061523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,1,128,1,fp8,fp8,0,5.38158073425293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,fp8,0,5.437940979003907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,fp8,fp8,0,5.482952117919922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,2,128,1,float16,float16,0,6.843315124511719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,float16,0,7.139435577392578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,float16,fp8,0,5.430532836914063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,fp8,0,5.460564804077149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,4,128,1,fp8,fp8,0,6.007494354248047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,fp8,0,3.2145614624023438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,float16,float16,0,7.113549041748047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,96,8,128,1,fp8,fp8,0,5.493020629882812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,fp8,fp8,0,3.2767024993896485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,96,128,1,float16,float16,0,5.877595138549805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,float16,0,3.194980812072754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,float16,fp8,0,2.704840087890625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,1,128,1,fp8,fp8,0,2.731705665588379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,float16,0,3.2418544769287108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,float16,fp8,0,3.4335086822509764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,2,128,1,fp8,fp8,0,2.7811071395874025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,fp8,0,2.695670318603516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,float16,float16,0,3.4209632873535156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,fp8,0,2.6799087524414062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,4,128,1,fp8,fp8,0,3.257904052734375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,fp8,0,1.539027214050293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,float16,float16,0,3.5201137542724608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,fp8,fp8,0,1.5656352043151855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,fp8,0,1.3674240112304688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,float16,float16,0,1.8286527633666991
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,1,128,1,fp8,fp8,0,1.3979167938232422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,96,8,128,1,fp8,fp8,0,2.8374847412109374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,96,128,1,float16,float16,0,2.944163131713867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,float16,0,1.5840288162231446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,float16,fp8,0,1.7310400009155273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,2,128,1,fp8,fp8,0,1.3588064193725586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,fp8,fp8,0,1.3721952438354492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,fp8,0,0.7810991764068603
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,float16,0,1.6277023315429688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,fp8,0,1.3556719779968263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,4,128,1,float16,fp8,0,1.6917007446289063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,fp8,fp8,0,1.5532896041870117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,96,8,128,1,float16,float16,0,1.6690767288208008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,float16,float16,0,1.6609247207641602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,96,128,1,fp8,fp8,0,0.7859519958496094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,float16,0,0.8051648139953613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,fp8,fp8,0,0.7058063983917237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,1,128,1,float16,fp8,0,0.8193280220031738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,fp8,fp8,0,0.7196208000183105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,float16,0,0.8248592376708984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,float16,0,0.831503963470459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,float16,fp8,0,0.7346960067749023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,2,128,1,float16,fp8,0,0.7866735935211182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,fp8,0,0.7060991764068604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,4,128,1,fp8,fp8,0,0.7058176040649414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,float16,float16,0,0.8739423751831055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,96,8,128,1,fp8,fp8,0,0.7438447952270508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,fp8,0,10.383550262451172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,fp8,fp8,0,10.32203369140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,fp8,0,10.302342224121094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,fp8,fp8,0,10.295489501953124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,1,128,1,float16,float16,0,13.025810241699219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,2,128,1,float16,float16,0,12.991233825683594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,float16,0,13.198478698730469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,float16,fp8,0,10.33807373046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,fp8,0,6.084635162353516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,fp8,fp8,0,6.099294281005859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,float16,0,6.445783996582032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,4,128,1,fp8,fp8,0,10.497774505615235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,fp8,0,10.581829071044922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,fp8,fp8,0,10.588323211669922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,96,8,128,1,float16,float16,0,14.312991333007812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,float16,fp8,0,5.173116683959961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,96,128,1,float16,float16,0,13.289399719238281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,1,128,1,fp8,fp8,0,5.157400131225586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,fp8,fp8,0,5.15827522277832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,fp8,0,5.170987319946289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,2,128,1,float16,float16,0,6.6781455993652346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,float16,0,6.609500885009766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,fp8,fp8,0,5.189276885986328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,4,128,1,float16,fp8,0,5.330332946777344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,fp8,0,5.215353775024414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,fp8,0,3.571755218505859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,fp8,fp8,0,3.251385498046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,fp8,fp8,0,5.240553665161133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,96,8,128,1,float16,float16,0,6.875193786621094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,float16,0,2.9523391723632812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,float16,fp8,0,2.5877872467041017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,96,128,1,float16,float16,0,6.582279968261719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,1,128,1,fp8,fp8,0,2.6096895217895506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,fp8,0,2.6213119506835936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,float16,float16,0,3.011960029602051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,2,128,1,fp8,fp8,0,2.603887939453125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,fp8,fp8,0,2.567555236816406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,float16,0,3.3323265075683595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,4,128,1,float16,fp8,0,3.0438207626342773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,fp8,0,1.505116844177246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,fp8,0,2.6073680877685548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,fp8,fp8,0,2.5759408950805662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,fp8,fp8,0,1.5378992080688476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,96,8,128,1,float16,float16,0,3.3614479064941407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,fp8,0,1.3611743927001954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,float16,float16,0,1.6170976638793946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,1,128,1,fp8,fp8,0,1.3083231925964356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,float16,0,1.4904512405395507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,96,128,1,float16,float16,0,3.7564640045166016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,float16,fp8,0,1.3146528244018554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,2,128,1,fp8,fp8,0,1.2953056335449218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,fp8,0,1.3261712074279786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,float16,float16,0,1.67620792388916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,4,128,1,fp8,fp8,0,1.4846816062927246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,float16,0,1.5973376274108886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,fp8,0,0.7660975933074952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,fp8,fp8,0,0.7686240196228027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,float16,fp8,0,1.436580753326416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,96,8,128,1,fp8,fp8,0,1.3014623641967773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,float16,0,0.8534735679626465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,float16,fp8,0,0.7010591983795166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,1,128,1,fp8,fp8,0,0.8257519721984863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,96,128,1,float16,float16,0,1.6760927200317384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,float16,0,0.7639408111572266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,float16,fp8,0,0.6672863960266113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,2,128,1,fp8,fp8,0,0.7094848155975342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,float16,0,0.7823696136474609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,float16,fp8,0,0.7143424034118653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,float16,0,0.8469280242919922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,float16,fp8,0,0.667521619796753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,4,128,1,fp8,fp8,0,0.6710319995880127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,96,8,128,1,fp8,fp8,0,0.6686511993408203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,fp8,0,0.4070303916931152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,fp8,fp8,0,0.4042384147644043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,96,128,1,float16,float16,0,0.862451171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,float16,0,0.39742240905761717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,float16,fp8,0,0.3705904006958008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,1,128,1,fp8,fp8,0,0.3545072078704834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,float16,0,0.40357117652893065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,float16,fp8,0,0.3554464101791382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,2,128,1,fp8,fp8,0,0.35505759716033936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,float16,0,0.416428804397583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,float16,fp8,0,0.3546191930770874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,4,128,1,fp8,fp8,0,0.35726079940795896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,float16,0,0.43427839279174807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,float16,fp8,0,0.3549072027206421
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,96,8,128,1,fp8,fp8,0,0.3551631927490234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,fp8,0,6.151243209838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,fp8,fp8,0,6.165063858032227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,fp8,0,6.207099151611328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,fp8,fp8,0,6.157270431518555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,1,128,1,float16,float16,0,7.533324432373047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,2,128,1,float16,float16,0,7.722316741943359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,float16,0,8.076338958740234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,float16,fp8,0,6.1921344757080075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,fp8,0,3.8451038360595704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,4,128,1,fp8,fp8,0,6.20675048828125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,fp8,fp8,0,3.8328399658203125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,fp8,0,6.195284652709961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,fp8,fp8,0,6.4477073669433596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,96,8,128,1,float16,float16,0,8.45113296508789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,fp8,0,3.0881168365478517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,96,128,1,float16,float16,0,8.949795532226563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,fp8,fp8,0,3.1275823593139647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,1,128,1,float16,float16,0,3.7637680053710936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,fp8,0,3.1090959548950194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,float16,float16,0,3.8486351013183593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,2,128,1,fp8,fp8,0,3.1346832275390626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,float16,0,4.012307357788086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,float16,fp8,0,3.0913711547851563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,4,128,1,fp8,fp8,0,3.0734800338745116
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,fp8,0,1.8866512298583984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,fp8,fp8,0,3.1206687927246093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,fp8,0,3.56988639831543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,96,8,128,1,float16,float16,0,4.171903991699219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,float16,0,1.7845071792602538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,float16,fp8,0,1.5789711952209473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,1,128,1,fp8,fp8,0,1.5546367645263672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,fp8,fp8,0,2.372758483886719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,fp8,0,1.5781935691833495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,float16,float16,0,1.7750560760498046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,96,128,1,float16,float16,0,4.902900695800781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,2,128,1,fp8,fp8,0,1.7984975814819335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,fp8,0,1.5503999710083007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,float16,float16,0,1.9498479843139649
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,fp8,0,0.9735504150390625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,4,128,1,fp8,fp8,0,1.5495136260986329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,fp8,0,1.5562864303588868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,float16,float16,0,2.1718208312988283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,fp8,fp8,0,0.9387776374816894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,float16,0,0.9306464195251465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,96,8,128,1,fp8,fp8,0,1.6660608291625976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,float16,fp8,0,0.8060959815979004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,96,128,1,float16,float16,0,2.272412872314453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,1,128,1,fp8,fp8,0,0.7923295974731446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,fp8,0,0.8187904357910156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,fp8,fp8,0,0.7924672126770019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,2,128,1,float16,float16,0,0.9053119659423828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,fp8,0,0.8093968391418457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,float16,float16,0,1.0526576042175293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,4,128,1,fp8,fp8,0,0.7939951896667481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,fp8,0,0.7921999931335449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,float16,float16,0,0.9965951919555665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,fp8,0,0.500273609161377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,fp8,fp8,0,0.4863935947418213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,96,8,128,1,fp8,fp8,0,0.8828495979309082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,float16,0,0.53297119140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,float16,fp8,0,0.4140960216522217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,96,128,1,float16,float16,0,1.1534879684448243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,fp8,fp8,0,0.4121103763580322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,1,128,1,fp8,fp8,0,0.41330561637878416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,float16,0,0.48418560028076174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,2,128,1,float16,fp8,0,0.4152560234069824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,float16,0,0.49033122062683104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,float16,fp8,0,0.46386241912841797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,4,128,1,fp8,fp8,0,0.41369118690490725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,float16,0,0.5095967769622802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,float16,fp8,0,0.4127376079559326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,96,8,128,1,fp8,fp8,0,0.4143439769744873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,fp8,0,0.26113440990448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,fp8,fp8,0,0.26262240409851073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,96,128,1,float16,float16,0,0.6219136238098144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,float16,0,0.25583999156951903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,float16,fp8,0,0.2228480100631714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,1,128,1,fp8,fp8,0,0.22358880043029786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,float16,0,0.25611200332641604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,float16,fp8,0,0.22491519451141356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,2,128,1,fp8,fp8,0,0.22388319969177245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,fp8,0,0.22852640151977538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,fp8,fp8,0,0.2241312026977539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,4,128,1,float16,float16,0,0.26376159191131593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,float16,0,0.2773263931274414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,float16,fp8,0,0.22410240173339843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,96,8,128,1,fp8,fp8,0,0.22395520210266112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,fp8,0,6.196646499633789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,fp8,fp8,0,6.2128032684326175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,fp8,0,6.200171279907226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,fp8,fp8,0,6.187601470947266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,fp8,0,6.187918472290039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,1,128,1,float16,float16,0,7.615487670898437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,2,128,1,float16,float16,0,7.7169921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,float16,float16,0,7.961940765380859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,fp8,0,4.019527816772461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,fp8,fp8,0,4.085214233398437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,float16,0,3.639836883544922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,4,128,1,fp8,fp8,0,6.200751876831054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,fp8,0,6.22532958984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,fp8,fp8,0,6.2333824157714846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,96,8,128,1,float16,float16,0,8.520767974853516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,float16,fp8,0,3.1118640899658203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,1,128,1,fp8,fp8,0,3.111950492858887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,fp8,0,3.129128074645996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,fp8,fp8,0,3.123174476623535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,2,128,1,float16,float16,0,3.812339019775391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,96,128,1,float16,float16,0,10.785678100585937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,fp8,0,3.148833656311035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,fp8,fp8,0,3.1084144592285154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,4,128,1,float16,float16,0,4.057676696777344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,fp8,0,3.146015930175781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,fp8,fp8,0,3.1248144149780273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,fp8,0,1.9465696334838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,96,8,128,1,float16,float16,0,4.1640369415283205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,fp8,fp8,0,1.9980207443237306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,fp8,0,1.9460767745971679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,float16,float16,0,1.7390127182006836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,1,128,1,fp8,fp8,0,1.58296480178833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,float16,0,1.8118671417236327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,float16,fp8,0,1.5616031646728517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,2,128,1,fp8,fp8,0,1.6754911422729493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,96,128,1,float16,float16,0,5.801481628417969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,float16,0,1.9238704681396483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,fp8,fp8,0,1.691166305541992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,4,128,1,float16,fp8,0,1.9053247451782227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,fp8,fp8,0,1.561963176727295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,float16,0,2.07861270904541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,fp8,0,0.9892368316650391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,96,8,128,1,float16,fp8,0,1.8954975128173828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,fp8,fp8,0,1.1733695983886718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,float16,0,0.884665584564209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,float16,fp8,0,0.7981184005737305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,1,128,1,fp8,fp8,0,0.793609619140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,96,128,1,float16,float16,0,2.7325279235839846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,fp8,0,0.7948575973510742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,float16,float16,0,0.9812335968017578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,2,128,1,fp8,fp8,0,0.8627599716186524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,fp8,0,0.7961967945098877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,fp8,fp8,0,0.795084810256958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,4,128,1,float16,float16,0,0.9752400398254395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,float16,0,1.0150752067565918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,float16,fp8,0,0.7956592082977295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,96,8,128,1,fp8,fp8,0,0.7956384181976318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,fp8,0,0.5091856002807618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,fp8,fp8,0,0.5082528114318847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,float16,0,0.4565904140472412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,float16,fp8,0,0.4608816146850586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,1,128,1,fp8,fp8,0,0.43013601303100585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,float16,0,0.46077117919921873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,float16,fp8,0,0.41083359718322754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,2,128,1,fp8,fp8,0,0.41162881851196287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,fp8,fp8,0,0.46352639198303225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,96,128,1,float16,float16,0,1.5252431869506835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,float16,0,0.5348591804504395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,float16,fp8,0,0.4104640007019043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,float16,0,0.4831088066101074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,4,128,1,float16,fp8,0,0.41048479080200195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,fp8,fp8,0,0.279748797416687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,96,8,128,1,fp8,fp8,0,0.41126079559326173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,fp8,0,0.21875040531158446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,float16,float16,0,0.24494240283966065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,float16,0,0.7067967891693115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,1,128,1,fp8,fp8,0,0.21827681064605714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,96,128,1,float16,fp8,0,0.2747296094894409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,float16,0,0.25097920894622805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,float16,fp8,0,0.2250368118286133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,2,128,1,fp8,fp8,0,0.21860480308532715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,float16,0,0.2588399887084961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,float16,fp8,0,0.2199023962020874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,4,128,1,fp8,fp8,0,0.21824960708618163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,float16,0,0.2796112060546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,float16,0,0.14383840560913086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,float16,fp8,0,0.21970078945159913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,96,8,128,1,fp8,fp8,0,0.21863839626312256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,fp8,0,0.14790560007095338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,float16,float16,0,0.3741919994354248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,96,128,1,fp8,fp8,0,0.14839839935302734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,float16,fp8,0,0.12152800559997559
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,fp8,0,0.12228480577468873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,1,128,1,fp8,fp8,0,0.12176159620285035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,float16,0,0.14326560497283936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,float16,fp8,0,0.12322080135345459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,2,128,1,fp8,fp8,0,0.12169120311737061
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,fp8,fp8,0,0.12194559574127198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,4,128,1,float16,float16,0,0.15059679746627808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,float16,0,0.1590175986289978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,float16,fp8,0,0.12256480455398559
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,96,8,128,1,fp8,fp8,0,0.12174240350723267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,fp8,0,3.898497772216797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,fp8,fp8,0,3.902203369140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,1,128,1,float16,float16,0,4.59400634765625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,fp8,0,3.9041088104248045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,fp8,fp8,0,3.8991168975830077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,2,128,1,float16,float16,0,4.718332672119141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,fp8,0,3.89788818359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,float16,float16,0,4.921918487548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,4,128,1,fp8,fp8,0,3.895935821533203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,fp8,fp8,0,2.5381824493408205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,fp8,0,2.7629072189331056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,float16,0,2.347248077392578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,fp8,fp8,0,3.9076961517333983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,fp8,0,4.116977691650391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,96,8,128,1,float16,float16,0,5.379953765869141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,float16,fp8,0,1.9613840103149414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,1,128,1,fp8,fp8,0,1.964227294921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,fp8,0,1.9630672454833984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,fp8,fp8,0,1.9609151840209962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,2,128,1,float16,float16,0,2.3233999252319335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,float16,0,2.429892730712891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,96,128,1,float16,float16,0,7.644000244140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,float16,fp8,0,1.962366485595703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,4,128,1,fp8,fp8,0,1.9635711669921876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,fp8,0,1.9600576400756835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,float16,float16,0,2.6013551712036134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,fp8,0,1.2785264015197755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,96,8,128,1,fp8,fp8,0,2.1130815505981446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,fp8,fp8,0,1.2810223579406739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,float16,0,1.0784640312194824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,float16,fp8,0,0.9908207893371582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,1,128,1,fp8,fp8,0,1.110321617126465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,fp8,0,1.017912006378174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,float16,float16,0,1.1166655540466308
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,2,128,1,fp8,fp8,0,0.9905584335327149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,float16,0,1.2244367599487305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,float16,fp8,0,0.9912447929382324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,96,128,1,float16,float16,0,3.9592975616455077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,4,128,1,fp8,fp8,0,1.115006446838379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,fp8,0,1.0099727630615234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,float16,float16,0,1.2921248435974122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,fp8,0,0.650923204421997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,float16,0,0.5590511798858643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,float16,fp8,0,0.5079840183258056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,96,8,128,1,fp8,fp8,0,0.991761589050293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,fp8,fp8,0,0.6521135807037354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,96,128,1,float16,float16,0,1.9472368240356446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,1,128,1,fp8,fp8,0,0.5073679924011231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,float16,0,0.5701663970947266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,float16,fp8,0,0.5067599773406982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,2,128,1,fp8,fp8,0,0.5068895816802979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,float16,0,0.5981535911560059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,float16,fp8,0,0.5071231842041015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,4,128,1,fp8,fp8,0,0.5077824115753173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,float16,0,0.6558303833007812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,float16,fp8,0,0.5064208030700683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,96,8,128,1,fp8,fp8,0,0.5282832145690918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,fp8,0,0.3374560117721558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,fp8,fp8,0,0.33804800510406496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,float16,0,0.29598240852355956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,96,128,1,float16,float16,0,0.9838735580444335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,float16,fp8,0,0.26590399742126464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,float16,0,0.31674559116363527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,1,128,1,fp8,fp8,0,0.26507840156555174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,fp8,fp8,0,0.26456000804901125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,fp8,0,0.266267204284668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,fp8,fp8,0,0.2651648044586182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,2,128,1,float16,float16,0,0.3020128011703491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,4,128,1,float16,fp8,0,0.2649231910705566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,fp8,0,0.26621279716491697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,float16,float16,0,0.34735679626464844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,96,8,128,1,fp8,fp8,0,0.2648015975952148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,fp8,0,0.1811087965965271
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,float16,float16,0,0.510700798034668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,96,128,1,fp8,fp8,0,0.18103840351104736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,float16,0,0.16660480499267577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,float16,fp8,0,0.14393279552459717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,1,128,1,fp8,fp8,0,0.14427839517593383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,float16,0,0.17522560358047484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,float16,fp8,0,0.14395040273666382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,2,128,1,fp8,fp8,0,0.14404959678649903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,float16,0,0.17830079793930054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,float16,fp8,0,0.1445296049118042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,4,128,1,fp8,fp8,0,0.14385119676589966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,fp8,fp8,0,0.10221920013427735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,float16,0,0.19180320501327514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,float16,fp8,0,0.14395999908447266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,96,8,128,1,fp8,fp8,0,0.14585280418395996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,float16,0,0.27326719760894774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,96,128,1,float16,fp8,0,0.10205279588699341
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,fp8,fp8,0,0.08389599919319153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,float16,0,0.09704639911651611
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,float16,fp8,0,0.08294079899787903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,1,128,1,fp8,fp8,0,0.08293120265007019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,fp8,0,0.08344640135765076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,float16,0,0.09759200215339661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,2,128,1,float16,fp8,0,0.08297439813613891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,float16,0,0.10379199981689453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,float16,fp8,0,0.08282399773597718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,4,128,1,fp8,fp8,0,0.08391519784927368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,float16,float16,0,0.109552001953125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,96,8,128,1,fp8,fp8,0,0.08310719728469848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,fp8,0,4.209372711181641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,fp8,fp8,0,4.21815185546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,1,128,1,float16,float16,0,4.933257675170898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,fp8,0,4.21136474609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,fp8,fp8,0,4.206620788574218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,2,128,1,float16,float16,0,4.952951812744141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,fp8,0,4.208702468872071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,float16,float16,0,5.332984161376953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,4,128,1,fp8,fp8,0,4.208841705322266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,fp8,0,3.0917423248291014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,fp8,fp8,0,4.208884811401367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,fp8,0,4.284494400024414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,fp8,fp8,0,2.980340766906738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,96,8,128,1,float16,float16,0,5.795644760131836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,float16,0,2.2999135971069338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,float16,fp8,0,2.117585563659668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,1,128,1,fp8,fp8,0,2.1166240692138674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,fp8,0,2.1141263961791994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,float16,float16,0,2.3376127243041993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,2,128,1,fp8,fp8,0,2.204987144470215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,fp8,0,2.116566467285156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,fp8,fp8,0,2.1148319244384766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,4,128,1,float16,float16,0,2.7545120239257814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,fp8,0,2.2359167098999024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,96,128,1,float16,float16,0,9.584977722167968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,float16,float16,0,2.8787792205810545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,96,8,128,1,fp8,fp8,0,2.1162559509277346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,float16,0,1.190544033050537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,fp8,0,1.4494159698486329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,float16,fp8,0,1.0865216255187988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,fp8,fp8,0,1.4528143882751465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,1,128,1,fp8,fp8,0,1.0660736083984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,float16,0,1.231811237335205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,float16,fp8,0,1.0680416107177735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,2,128,1,fp8,fp8,0,1.1200016021728516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,fp8,0,1.0680447578430177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,fp8,fp8,0,1.0659503936767578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,4,128,1,float16,float16,0,1.2670767784118653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,fp8,fp8,0,1.0678640365600587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,fp8,0,1.2412591934204102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,8,128,1,float16,float16,0,1.4235119819641113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,fp8,0,0.7326096057891845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,fp8,fp8,0,0.7361743927001954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,float16,0,0.6029391765594483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,float16,fp8,0,0.5433663845062255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,96,96,128,1,float16,float16,0,4.798904037475586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,1,128,1,fp8,fp8,0,0.580950403213501
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,float16,0,0.6267871856689453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,fp8,fp8,0,0.5419087886810303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,fp8,0,0.5428256034851074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,2,128,1,float16,fp8,0,0.5422880172729492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,float16,float16,0,0.6472879886627197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,96,128,1,float16,float16,0,2.4481632232666017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,4,128,1,fp8,fp8,0,0.5428607940673829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,float16,0,0.7525328159332275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,float16,fp8,0,0.552294397354126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,float16,0,0.3106112003326416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,96,8,128,1,fp8,fp8,0,0.5425439834594726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,fp8,0,0.375598406791687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,fp8,fp8,0,0.3791599988937378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,float16,fp8,0,0.2809760093688965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,1,128,1,fp8,fp8,0,0.2853951930999756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,96,128,1,float16,float16,0,1.2261695861816406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,float16,0,0.322271990776062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,float16,fp8,0,0.2809567928314209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,2,128,1,fp8,fp8,0,0.28033759593963625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,float16,0,0.33880479335784913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,float16,fp8,0,0.2805327892303467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,4,128,1,fp8,fp8,0,0.2807775974273682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,float16,0,0.3795232057571411
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,float16,fp8,0,0.28136639595031737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,96,8,128,1,fp8,fp8,0,0.2809760093688965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,fp8,0,0.19939839839935303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,float16,0,0.17912960052490234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,fp8,fp8,0,0.19832160472869872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,float16,0,0.17177120447158814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,float16,fp8,0,0.1501296043395996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,1,128,1,fp8,fp8,0,0.15068479776382446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,96,128,1,float16,float16,0,0.6309311866760254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,float16,fp8,0,0.1506592035293579
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,2,128,1,fp8,fp8,0,0.15026400089263917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,float16,0,0.18813760280609132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,fp8,fp8,0,0.15061119794845582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,float16,0,0.20719358921051026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,4,128,1,float16,fp8,0,0.15074080228805542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,float16,fp8,0,0.1505087971687317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,96,8,128,1,fp8,fp8,0,0.15035040378570558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,fp8,0,0.11021759510040283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,float16,float16,0,0.3309920072555542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,96,128,1,fp8,fp8,0,0.10975840091705322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,float16,0,0.10368800163269043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,float16,0,0.11173919439315796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,float16,fp8,0,0.08394880294799804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,1,128,1,fp8,fp8,0,0.08370239734649658
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,float16,0,0.10233919620513916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,float16,fp8,0,0.08404960036277771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,2,128,1,fp8,fp8,0,0.08410559892654419
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,float16,fp8,0,0.08366240262985229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,4,128,1,fp8,fp8,0,0.08459680080413819
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,float16,0,0.1211583971977234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,float16,fp8,0,0.08402720093727112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,96,8,128,1,fp8,fp8,0,0.08399519920349122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,float16,0,0.18119679689407348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,float16,fp8,0,0.06336640119552613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,96,128,1,fp8,fp8,0,0.06293439865112305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,float16,0,0.06386399865150452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,float16,fp8,0,0.05143359899520874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,1,128,1,fp8,fp8,0,0.05078719854354859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,float16,0,0.06420959830284119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,float16,fp8,0,0.05082880258560181
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,2,128,1,fp8,fp8,0,0.051481598615646364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,float16,0,0.06416320204734802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,float16,fp8,0,0.05060160160064697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,float16,0,0.06998239755630493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,4,128,1,fp8,fp8,0,0.05082079768180847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,float16,fp8,0,0.05087199807167053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,96,8,128,1,fp8,fp8,0,0.05088000297546387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,fp8,0,3.2188209533691405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,fp8,fp8,0,3.221491241455078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,1,128,1,float16,float16,0,3.5766014099121093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,fp8,fp8,0,3.21749267578125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,float16,0,3.660785675048828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,2,128,1,float16,fp8,0,3.217407989501953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,float16,0,4.001604843139648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,float16,fp8,0,3.211452865600586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,4,128,1,fp8,fp8,0,3.2122528076171877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,fp8,0,2.37183837890625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,fp8,0,3.2122577667236327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,fp8,fp8,0,3.291340637207031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,96,8,128,1,float16,float16,0,4.622404861450195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,float16,0,1.7340112686157227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,float16,fp8,0,1.616980743408203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,fp8,fp8,0,2.3586496353149413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,1,128,1,fp8,fp8,0,1.6192655563354492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,float16,0,1.8229631423950194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,float16,fp8,0,1.615096092224121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,2,128,1,fp8,fp8,0,1.6163936614990235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,float16,0,2.007281684875488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,float16,fp8,0,1.6134143829345704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,4,128,1,fp8,fp8,0,1.6142463684082031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,fp8,0,1.6653408050537108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,float16,float16,0,2.294207954406738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,8,128,1,fp8,fp8,0,1.6131471633911132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,float16,0,0.8816335678100586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,fp8,0,1.1937151908874513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,fp8,fp8,0,1.1857855796813965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,float16,fp8,0,0.8211968421936036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,96,96,128,1,float16,float16,0,8.967407989501954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,1,128,1,fp8,fp8,0,0.8164560317993164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,float16,0,0.9194160461425781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,float16,fp8,0,0.8153519630432129
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,2,128,1,fp8,fp8,0,0.8148943901062011
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,fp8,0,0.8165743827819825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,float16,float16,0,0.9982239723205566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,4,128,1,fp8,fp8,0,0.8159600257873535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,fp8,0,0.8144960403442383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,fp8,fp8,0,0.8146176338195801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,fp8,0,0.6023471832275391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,8,128,1,float16,float16,0,1.1600655555725097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,float16,0,0.45511679649353026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,fp8,fp8,0,0.6011184215545654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,float16,fp8,0,0.41550397872924805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,96,96,128,1,float16,float16,0,4.506889724731446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,1,128,1,fp8,fp8,0,0.41559839248657227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,float16,0,0.47542881965637207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,float16,fp8,0,0.41550722122192385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,2,128,1,fp8,fp8,0,0.4159503936767578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,float16,0,0.5131072044372559
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,float16,fp8,0,0.41556639671325685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,96,128,1,float16,float16,0,2.2695295333862306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,4,128,1,fp8,fp8,0,0.4146431922912598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,float16,0,0.592571210861206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,float16,fp8,0,0.4149328231811523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,96,8,128,1,fp8,fp8,0,0.4156015872955322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,fp8,0,0.3093456029891968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,fp8,fp8,0,0.309116792678833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,float16,0,0.2446768045425415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,float16,fp8,0,0.21586239337921143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,1,128,1,fp8,fp8,0,0.21559679508209229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,96,128,1,float16,float16,0,1.1514592170715332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,float16,0,0.25403680801391604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,float16,fp8,0,0.21579198837280272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,2,128,1,fp8,fp8,0,0.21565918922424315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,float16,0,0.2720335960388184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,float16,fp8,0,0.21532158851623534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,4,128,1,fp8,fp8,0,0.21554079055786132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,float16,0,0.3111135959625244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,float16,fp8,0,0.21596639156341552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,96,8,128,1,fp8,fp8,0,0.21604158878326415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,fp8,0,0.16276960372924804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,fp8,fp8,0,0.16265759468078614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,float16,0,0.13667999505996703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,96,128,1,float16,float16,0,0.5906960010528565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,float16,fp8,0,0.11590240001678467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,float16,0,0.1439087986946106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,float16,fp8,0,0.11605919599533081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,1,128,1,fp8,fp8,0,0.11578400135040283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,float16,0,0.15106240510940552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,2,128,1,fp8,fp8,0,0.11624480485916137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,float16,fp8,0,0.11672639846801758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,4,128,1,fp8,fp8,0,0.115830397605896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,float16,0,0.1711392045021057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,float16,fp8,0,0.11684800386428833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,96,8,128,1,fp8,fp8,0,0.11643199920654297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,float16,0,0.31075520515441896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,float16,fp8,0,0.09040319919586182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,96,128,1,fp8,fp8,0,0.0902239978313446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,float16,0,0.0845296025276184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,float16,fp8,0,0.06517760157585144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,1,128,1,fp8,fp8,0,0.0653711974620819
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,float16,0,0.0844543993473053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,float16,fp8,0,0.065583997964859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,2,128,1,fp8,fp8,0,0.06553599834442139
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,float16,0,0.08881279826164246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,float16,fp8,0,0.06503360271453858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,4,128,1,fp8,fp8,0,0.06517919898033142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,float16,0,0.09943360090255737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,float16,fp8,0,0.06569759845733643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,96,8,128,1,fp8,fp8,0,0.06562719941139221
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,float16,0,0.16824159622192383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,float16,fp8,0,0.05143200159072876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,96,128,1,fp8,fp8,0,0.05132799744606018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,float16,0,0.05151360034942627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,float16,fp8,0,0.03919680118560791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,1,128,1,fp8,fp8,0,0.039129599928855896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,float16,0,0.05143839716911316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,float16,fp8,0,0.039139199256896975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,2,128,1,fp8,fp8,0,0.03928639888763428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,float16,0,0.051883202791213986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,float16,fp8,0,0.03918080031871796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,4,128,1,fp8,fp8,0,0.03912160098552704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,float16,0,0.0564079999923706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,float16,0,0.0804095983505249
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,float16,fp8,0,0.0391184002161026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,96,8,128,1,fp8,fp8,0,0.03916319906711578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,float16,fp8,0,0.0330159991979599
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,96,128,1,fp8,fp8,0,0.03303360044956207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,float16,0,0.03923520147800445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,float16,fp8,0,0.026787200570106508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,1,128,1,fp8,fp8,0,0.026998400688171387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,float16,0,0.039164799451828006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,float16,fp8,0,0.026862400770187377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,2,128,1,fp8,fp8,0,0.0267984002828598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,float16,0,0.03931840062141419
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,float16,fp8,0,0.02701759934425354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,4,128,1,fp8,fp8,0,0.02685759961605072
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,float16,0,0.03945440053939819
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,float16,fp8,0,0.027001601457595826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,96,8,128,1,fp8,fp8,0,0.026931199431419372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,float16,0,1.436075210571289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,float16,fp8,0,1.3388992309570313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,1,128,1,fp8,fp8,0,1.3383567810058594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,float16,0,1.5161871910095215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,float16,fp8,0,1.3355456352233888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,2,128,1,fp8,fp8,0,1.3350655555725097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,fp8,0,1.3343680381774903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,float16,float16,0,1.6721935272216797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,4,128,1,fp8,fp8,0,1.334441566467285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,fp8,0,1.3329008102416993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,float16,0,0.7318111896514893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,fp8,0,1.0430031776428224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,fp8,fp8,0,1.3339200019836426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,fp8,fp8,0,1.0421263694763183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,96,8,128,1,float16,float16,0,1.9927839279174804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,float16,fp8,0,0.6765088081359864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,1,128,1,fp8,fp8,0,0.6756224155426025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,float16,0,0.7696688175201416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,float16,fp8,0,0.6744912147521973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,2,128,1,fp8,fp8,0,0.675812816619873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,fp8,0,0.6741343975067139
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,float16,float16,0,0.8469136238098145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,4,128,1,fp8,fp8,0,0.6748816013336182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,fp8,0,0.6727200031280518
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,fp8,fp8,0,0.6732768058776856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,8,128,1,float16,float16,0,1.0070256233215331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,fp8,0,0.5297488212585449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,float16,0,0.37891199588775637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,float16,fp8,0,0.34281919002532957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,fp8,fp8,0,0.5285071849822998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,1,128,1,fp8,fp8,0,0.34276959896087644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,float16,0,0.3974800109863281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,float16,fp8,0,0.34285759925842285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,2,128,1,fp8,fp8,0,0.3430527925491333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,96,96,128,1,float16,float16,0,4.338843154907226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,float16,0,0.43563199043273926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,float16,fp8,0,0.3421056032180786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,4,128,1,fp8,fp8,0,0.3423583984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,96,128,1,float16,float16,0,2.1860944747924806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,float16,0,0.5140719890594483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,float16,fp8,0,0.34299359321594236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,96,8,128,1,fp8,fp8,0,0.34244959354400634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,fp8,0,0.2704511880874634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,fp8,fp8,0,0.2692751884460449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,float16,0,0.20453760623931885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,float16,fp8,0,0.1774799942970276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,1,128,1,fp8,fp8,0,0.17663520574569702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,96,128,1,float16,float16,0,1.1065775871276855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,float16,0,0.2119983911514282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,float16,fp8,0,0.1766592025756836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,2,128,1,fp8,fp8,0,0.1767680048942566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,float16,0,0.2309743881225586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,float16,fp8,0,0.1770751953125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,4,128,1,fp8,fp8,0,0.1770527958869934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,fp8,fp8,0,0.143721604347229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,float16,0,0.26974079608917234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,float16,fp8,0,0.17711199522018434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,96,8,128,1,fp8,fp8,0,0.1772063970565796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,float16,0,0.5682015895843506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,float16,0,0.11665600538253784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,96,128,1,float16,fp8,0,0.14359840154647827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,float16,fp8,0,0.09648640155792236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,1,128,1,fp8,fp8,0,0.09666399955749512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,float16,0,0.12186720371246337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,float16,fp8,0,0.09663839936256409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,2,128,1,fp8,fp8,0,0.0966048002243042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,float16,0,0.1317199945449829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,float16,fp8,0,0.0966816008090973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,4,128,1,fp8,fp8,0,0.09660159945487976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,float16,0,0.15136480331420898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,float16,fp8,0,0.0966159999370575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,96,8,128,1,fp8,fp8,0,0.09685760140419006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,fp8,0,0.05551040172576904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,fp8,0,0.08013920187950134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,float16,float16,0,0.3002863883972168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,96,128,1,fp8,fp8,0,0.08008639812469483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,float16,float16,0,0.07356160283088684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,1,128,1,fp8,fp8,0,0.05556960105895996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,float16,0,0.07226880192756653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,float16,fp8,0,0.05559520125389099
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,2,128,1,fp8,fp8,0,0.05547680258750916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,float16,0,0.08096960186958313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,float16,fp8,0,0.055553597211837766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,4,128,1,fp8,fp8,0,0.055606400966644286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,float16,0,0.08824639916419982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,float16,fp8,0,0.055369597673416135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,96,8,128,1,fp8,fp8,0,0.05560160279273987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,float16,0,0.16004639863967896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,float16,fp8,0,0.04329760074615478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,96,128,1,fp8,fp8,0,0.043838399648666385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,float16,0,0.04377120137214661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,float16,fp8,0,0.03289920091629028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,1,128,1,fp8,fp8,0,0.03162240087985992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,float16,0,0.04405759871006012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,float16,fp8,0,0.032528001070022586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,2,128,1,fp8,fp8,0,0.03281440138816834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,float16,0,0.0451664000749588
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,float16,fp8,0,0.03245440125465393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,4,128,1,fp8,fp8,0,0.031699201464653014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,float16,0,0.04969759881496429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,fp8,0,0.022771200537681578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,float16,fp8,0,0.03272480070590973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,float16,0,0.035041600465774536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,96,8,128,1,fp8,fp8,0,0.03181599974632263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,float16,0,0.07602239847183227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,float16,0,0.03515360057353974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,float16,fp8,0,0.029073598980903625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,96,128,1,fp8,fp8,0,0.028974398970603943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,float16,float16,0,0.03495680093765259
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,1,128,1,fp8,fp8,0,0.02277279943227768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,float16,fp8,0,0.02290560007095337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,2,128,1,fp8,fp8,0,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,float16,fp8,0,0.022710399329662324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,4,128,1,fp8,fp8,0,0.022873599827289582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,float16,0,0.03508000075817108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,float16,fp8,0,0.022945599257946016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,96,8,128,1,fp8,fp8,0,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,float16,0,0.043665599822998044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,float16,fp8,0,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,fp8,fp8,0,0.014585599303245544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,float16,0,0.02703840136528015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,float16,fp8,0,0.014923200011253357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,96,128,1,fp8,fp8,0,0.018787199258804323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,float16,0,0.026840001344680786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,1,128,1,fp8,fp8,0,0.015209600329399109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,float16,0,0.026923200488090514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,2,128,1,float16,fp8,0,0.015171200037002563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,float16,0,0.02494879961013794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,float16,fp8,0,0.014740799367427827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,4,128,1,fp8,fp8,0,0.016518400609493257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,fp8,fp8,0,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,96,8,128,1,float16,fp8,0,0.016492800414562227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,float16,0,0.8671168327331543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,float16,fp8,0,0.8032943725585937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,1,128,1,fp8,fp8,0,0.8023103713989258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,fp8,0,0.8019120216369628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,fp8,fp8,0,0.802126407623291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,2,128,1,float16,float16,0,0.9056927680969238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,fp8,0,0.8008912086486817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,float16,float16,0,0.9834799766540527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,4,128,1,fp8,fp8,0,0.7995039939880371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,fp8,0,0.7987631797790528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,float16,float16,0,1.1426159858703613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,96,8,128,1,fp8,fp8,0,0.7991168022155761
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,fp8,0,0.5919968128204346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,float16,0,0.4460031986236572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,float16,fp8,0,0.4073728084564209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,1,128,1,fp8,fp8,0,0.40728321075439455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,float16,0,0.46614880561828614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,fp8,fp8,0,0.5919007778167724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,float16,fp8,0,0.4068175792694092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,2,128,1,fp8,fp8,0,0.4075039863586426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,float16,0,0.5041024208068847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,float16,fp8,0,0.40625438690185545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,4,128,1,fp8,fp8,0,0.40572319030761717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,float16,0,0.5811423778533935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,float16,fp8,0,0.4060224056243896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,96,128,1,float16,float16,0,2.2483312606811525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,96,8,128,1,fp8,fp8,0,0.40579838752746583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,fp8,0,0.3018032073974609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,fp8,fp8,0,0.3014847993850708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,float16,0,0.23720641136169435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,float16,fp8,0,0.20931038856506348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,1,128,1,fp8,fp8,0,0.20930559635162355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,96,128,1,float16,float16,0,1.1371536254882812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,float16,0,0.24632959365844725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,float16,fp8,0,0.20884959697723388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,2,128,1,fp8,fp8,0,0.20895519256591796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,float16,0,0.2650480031967163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,fp8,fp8,0,0.20895678997039796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,float16,fp8,0,0.20919039249420165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,fp8,0,0.15585119724273683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,4,128,1,fp8,fp8,0,0.20931680202484132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,float16,0,0.3037856101989746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,96,8,128,1,float16,fp8,0,0.20869600772857666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,fp8,fp8,0,0.15614720582962036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,96,128,1,float16,float16,0,0.5812448024749756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,float16,0,0.1314784049987793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,fp8,fp8,0,0.10913280248641968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,float16,0,0.1461680054664612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,float16,0,0.1377519965171814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,fp8,fp8,0,0.10932960510253906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,1,128,1,float16,fp8,0,0.1094048023223877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,float16,fp8,0,0.10960960388183594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,2,128,1,fp8,fp8,0,0.10896159410476684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,4,128,1,float16,fp8,0,0.10974559783935547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,fp8,fp8,0,0.08421120047569275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,fp8,0,0.10970239639282227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,fp8,0,0.05975679755210876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,float16,float16,0,0.1658128023147583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,96,8,128,1,fp8,fp8,0,0.10930880308151245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,fp8,0,0.08479679822921753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,96,128,1,float16,float16,0,0.3036511898040771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,float16,float16,0,0.07971680164337158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,1,128,1,fp8,fp8,0,0.059622400999069215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,fp8,fp8,0,0.05969759821891785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,float16,0,0.07952960133552552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,float16,fp8,0,0.05971840023994446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,2,128,1,fp8,fp8,0,0.05981760025024414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,float16,0,0.08639680147171021
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,4,128,1,float16,fp8,0,0.05981760025024414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,float16,0,0.0949184000492096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,float16,fp8,0,0.0597760021686554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,96,8,128,1,fp8,fp8,0,0.05971519947052002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,float16,0,0.16414239406585693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,float16,fp8,0,0.04735519886016846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,96,128,1,fp8,fp8,0,0.04734880030155182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,float16,0,0.04738560020923614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,float16,fp8,0,0.03529120087623596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,1,128,1,fp8,fp8,0,0.035097599029541016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,float16,0,0.04740320146083832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,float16,fp8,0,0.035036799311637876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,2,128,1,fp8,fp8,0,0.03521760106086731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,float16,0,0.04920479953289032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,float16,fp8,0,0.03511199951171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,4,128,1,fp8,fp8,0,0.035183998942375186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,float16,0,0.053668802976608275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,float16,fp8,0,0.03523840010166168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,96,8,128,1,fp8,fp8,0,0.035227200388908385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,float16,0,0.07597919702529907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,float16,fp8,0,0.026932799816131593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,96,128,1,fp8,fp8,0,0.02699199914932251
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,float16,0,0.032971200346946714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,float16,fp8,0,0.021766400337219237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,1,128,1,fp8,fp8,0,0.02244960069656372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,float16,0,0.03451519906520843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,float16,fp8,0,0.02125120013952255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,2,128,1,fp8,fp8,0,0.021425600349903106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,float16,0,0.03497599959373474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,float16,fp8,0,0.021830399334430695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,4,128,1,fp8,fp8,0,0.02261119931936264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,float16,0,0.035020801424980166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,float16,fp8,0,0.021670399606227873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,96,8,128,1,fp8,fp8,0,0.022204799950122832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,float16,0,0.04556959867477417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,float16,fp8,0,0.019211199879646302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,96,128,1,fp8,fp8,0,0.018854400515556334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,float16,0,0.02680639922618866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,1,128,1,fp8,fp8,0,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,float16,0,0.026846399903297423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,float16,fp8,0,0.01674239933490753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,2,128,1,fp8,fp8,0,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,float16,0,0.02686559855937958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,float16,fp8,0,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,4,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,float16,0,0.026952001452445983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,float16,fp8,0,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,96,8,128,1,fp8,fp8,0,0.016523200273513793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,float16,0,0.029967999458312987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,float16,fp8,0,0.01266240030527115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,float16,0,0.020846399664878845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,float16,fp8,0,0.01170559972524643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,96,128,1,fp8,fp8,0,0.012649600207805634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,fp8,fp8,0,0.01188800036907196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,float16,0,0.020772799849510193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,2,128,1,fp8,fp8,0,0.011443199962377549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,float16,0,0.020657600462436677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,4,128,1,float16,fp8,0,0.011572799831628799
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,float16,0,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,float16,fp8,0,0.012167999893426895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,96,8,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,float16,0,0.6705183982849121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,float16,fp8,0,0.6104623794555664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,1,128,1,fp8,fp8,0,0.6108176231384277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,fp8,0,0.6110223770141602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,float16,float16,0,0.6888127803802491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,2,128,1,fp8,fp8,0,0.6104015827178955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,float16,0,0.7281663894653321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,float16,fp8,0,0.6110367774963379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,4,128,1,fp8,fp8,0,0.6098112106323242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,fp8,0,0.6099008083343506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,float16,float16,0,0.8071200370788574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,96,8,128,1,fp8,fp8,0,0.6096591949462891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,fp8,0,0.403707218170166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,fp8,fp8,0,0.403707218170166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,float16,0,0.34897279739379883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,float16,fp8,0,0.31087679862976075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,1,128,1,fp8,fp8,0,0.3108367919921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,96,128,1,float16,float16,0,1.2522560119628907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,float16,0,0.3761631965637207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,float16,0,0.3565360069274902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,float16,fp8,0,0.3108464002609253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,2,128,1,fp8,fp8,0,0.3111567974090576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,float16,0,0.4153776168823242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,float16,fp8,0,0.3103487968444824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,4,128,1,fp8,fp8,0,0.30976641178131104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,float16,fp8,0,0.3100032091140747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,96,8,128,1,fp8,fp8,0,0.3102560043334961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,fp8,0,0.20757761001586914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,fp8,fp8,0,0.20779039859771728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,float16,0,0.18605600595474242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,96,128,1,float16,float16,0,0.6390367984771729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,float16,fp8,0,0.16046559810638428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,1,128,1,fp8,fp8,0,0.16040159463882447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,float16,0,0.19263839721679688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,float16,fp8,0,0.160588800907135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,2,128,1,fp8,fp8,0,0.1605712056159973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,float16,0,0.20051519870758056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,float16,fp8,0,0.16178560256958008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,4,128,1,fp8,fp8,0,0.16053919792175292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,float16,0,0.2205280065536499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,float16,fp8,0,0.16034560203552245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,96,8,128,1,fp8,fp8,0,0.16057440042495727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,fp8,0,0.10947680473327637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,float16,float16,0,0.33122079372406005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,96,128,1,fp8,fp8,0,0.10923199653625489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,float16,0,0.10888479948043824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,float16,fp8,0,0.08515040278434753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,1,128,1,fp8,fp8,0,0.08506399989128113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,float16,0,0.10801600217819214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,float16,fp8,0,0.08478720188140869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,2,128,1,fp8,fp8,0,0.08481919765472412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,float16,0,0.11458879709243774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,float16,fp8,0,0.08484320044517517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,4,128,1,fp8,fp8,0,0.08619040250778198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,float16,0,0.12285120487213134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,float16,fp8,0,0.08535360097885132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,96,8,128,1,fp8,fp8,0,0.08505600094795226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,float16,0,0.17674880027770995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,float16,fp8,0,0.05958719849586487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,96,128,1,fp8,fp8,0,0.059571200609207155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,float16,0,0.061689597368240354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,float16,fp8,0,0.04737919867038727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,1,128,1,fp8,fp8,0,0.04739519953727722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,float16,0,0.06186079978942871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,float16,fp8,0,0.047440001368522645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,2,128,1,fp8,fp8,0,0.04736480116844177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,float16,0,0.06192799806594849
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,float16,fp8,0,0.04740320146083832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,4,128,1,fp8,fp8,0,0.047367998957633974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,float16,0,0.06815680265426635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,float16,fp8,0,0.0473471999168396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,96,8,128,1,fp8,fp8,0,0.04732640087604523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,float16,0,0.08375679850578308
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,float16,fp8,0,0.03502399921417236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,96,128,1,fp8,fp8,0,0.03478879928588867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,float16,0,0.04113599956035614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,float16,fp8,0,0.028836798667907716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,1,128,1,fp8,fp8,0,0.028838399052619933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,float16,0,0.041315200924873355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,float16,fp8,0,0.02914080023765564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,2,128,1,fp8,fp8,0,0.028814399242401124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,float16,0,0.041833600401878356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,float16,fp8,0,0.02884480059146881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,4,128,1,fp8,fp8,0,0.028830400109291075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,float16,0,0.04259839951992035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,float16,fp8,0,0.028948798775672913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,96,8,128,1,fp8,fp8,0,0.028884801268577575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,float16,0,0.047512000799179076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,float16,fp8,0,0.021129600703716278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,96,128,1,fp8,fp8,0,0.020790399610996248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,float16,0,0.02887200117111206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,float16,fp8,0,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,1,128,1,fp8,fp8,0,0.01854719966650009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,float16,0,0.02914080023765564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,float16,fp8,0,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,2,128,1,fp8,fp8,0,0.018724800646305086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,float16,0,0.02922079861164093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,float16,fp8,0,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,4,128,1,fp8,fp8,0,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,fp8,fp8,0,0.014958399534225463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,float16,0,0.029123198986053467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,float16,fp8,0,0.018649600446224213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,96,8,128,1,fp8,fp8,0,0.018745599687099455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,float16,0,0.03295199871063233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,96,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,float16,0,0.024718399345874786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,float16,0,0.023603199422359465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,1,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,float16,fp8,0,0.014684799313545226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,2,128,1,fp8,fp8,0,0.014545600116252898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,float16,0,0.024622400104999543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,4,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,float16,0,0.024753600358963013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,fp8,fp8,0,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,float16,0,0.022921599447727203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,96,8,128,1,float16,fp8,0,0.01462399959564209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,float16,fp8,0,0.010515200346708298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,96,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,float16,0,0.02048799991607666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,1,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,float16,0,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,float16,fp8,0,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,float16,0,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,2,128,1,fp8,fp8,0,0.010539200156927109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,4,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,float16,0,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,96,8,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,float16,0,0.6014944076538086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,fp8,fp8,0,0.5400752067565918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,1,128,1,float16,fp8,0,0.5392735958099365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,float16,0,0.6008128166198731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,float16,fp8,0,0.5400464057922363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,2,128,1,fp8,fp8,0,0.5390768051147461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,float16,0,0.6303184032440186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,float16,fp8,0,0.5379312038421631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,4,128,1,fp8,fp8,0,0.5388783931732177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,fp8,0,0.32012479305267333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,float16,0,0.6649775981903077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,float16,float16,0,0.7650688171386719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,float16,fp8,0,0.5390399932861328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,96,8,128,1,fp8,fp8,0,0.5391295909881592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,96,128,1,fp8,fp8,0,0.32001919746398927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,float16,0,0.3124783992767334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,float16,fp8,0,0.27463040351867674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,1,128,1,fp8,fp8,0,0.2737936019897461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,float16,0,0.31709439754486085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,float16,fp8,0,0.273307204246521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,2,128,1,fp8,fp8,0,0.27406721115112304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,float16,0,0.32315840721130373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,float16,fp8,0,0.27286720275878906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,fp8,0,0.1648687958717346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,4,128,1,fp8,fp8,0,0.27292959690093993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,float16,0,0.3486320018768311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,float16,fp8,0,0.2737519979476929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,96,8,128,1,fp8,fp8,0,0.2737760066986084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,float16,float16,0,0.3937407970428467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,96,128,1,fp8,fp8,0,0.16507840156555176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,fp8,fp8,0,0.14044640064239503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,float16,0,0.17127840518951415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,float16,fp8,0,0.14039520025253296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,1,128,1,fp8,fp8,0,0.14030879735946655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,float16,0,0.17330399751663209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,2,128,1,float16,fp8,0,0.14059040546417237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,fp8,fp8,0,0.140830397605896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,float16,0,0.17592639923095704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,float16,fp8,0,0.14084160327911377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,4,128,1,fp8,fp8,0,0.14158079624176026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,float16,0,0.18594239950180053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,96,8,128,1,float16,fp8,0,0.13998559713363648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,float16,0,0.20995359420776366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,float16,fp8,0,0.08688960075378419
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,96,128,1,fp8,fp8,0,0.08731039762496948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,fp8,fp8,0,0.07547199726104736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,float16,0,0.09303839802742005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,float16,fp8,0,0.07483680248260498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,1,128,1,fp8,fp8,0,0.07471200227737426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,float16,0,0.09311839938163757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,2,128,1,float16,fp8,0,0.07488800287246704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,float16,0,0.09288319945335388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,float16,fp8,0,0.07506399750709533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,4,128,1,fp8,fp8,0,0.07554240226745605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,float16,0,0.09762719869613648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,float16,fp8,0,0.07481279969215393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,96,8,128,1,fp8,fp8,0,0.0751200020313263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,float16,0,0.10291680097579955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,float16,fp8,0,0.047860801219940186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,96,128,1,fp8,fp8,0,0.047881600260734555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,fp8,fp8,0,0.042715200781822206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,float16,0,0.05721279978752136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,float16,fp8,0,0.04227199852466583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,1,128,1,fp8,fp8,0,0.04159359931945801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,float16,0,0.057532799243927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,2,128,1,float16,fp8,0,0.042449599504470824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,float16,0,0.0572704017162323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,float16,fp8,0,0.041715198755264284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,4,128,1,fp8,fp8,0,0.042894399166107176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,float16,0,0.05791040062904358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,float16,fp8,0,0.04183520078659057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,96,8,128,1,fp8,fp8,0,0.04169760048389435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,float16,0,0.05603039860725403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,float16,fp8,0,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,96,128,1,fp8,fp8,0,0.02900800108909607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,float16,0,0.03851679861545563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,float16,fp8,0,0.02601439952850342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,1,128,1,fp8,fp8,0,0.02576799988746643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,float16,0,0.03735359907150269
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,float16,fp8,0,0.02669920027256012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,2,128,1,fp8,fp8,0,0.025944000482559203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,float16,0,0.037422400712966916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,float16,fp8,0,0.02677280008792877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,4,128,1,fp8,fp8,0,0.026315200328826904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,float16,0,0.03867680132389069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,float16,fp8,0,0.02587360143661499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,96,8,128,1,fp8,fp8,0,0.02646239995956421
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,float16,0,0.035548800230026247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,float16,fp8,0,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,96,128,1,fp8,fp8,0,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,float16,0,0.02688640058040619
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,fp8,0,0.016689600050449373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,float16,fp8,0,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,1,128,1,fp8,fp8,0,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,float16,0,0.026743999123573302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,float16,fp8,0,0.016667200624942778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,2,128,1,fp8,fp8,0,0.016652800142765045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,float16,float16,0,0.026953598856925963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,4,128,1,fp8,fp8,0,0.016771200299263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,float16,0,0.026868799328804018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,float16,fp8,0,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,fp8,fp8,0,0.013142399489879608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,96,8,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,fp8,0,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,fp8,fp8,0,0.014548799395561219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,96,128,1,float16,float16,0,0.026915198564529418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,fp8,0,0.012591999769210816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,float16,0,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,fp8,fp8,0,0.013184000551700593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,1,128,1,float16,fp8,0,0.012785600125789642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,float16,0,0.02268960028886795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,float16,fp8,0,0.013043199479579926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,2,128,1,fp8,fp8,0,0.013049599528312684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,4,128,1,float16,float16,0,0.022707200050354003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,float16,0,0.022886399924755097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,float16,fp8,0,0.013182400166988373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,96,8,128,1,fp8,fp8,0,0.013043199479579926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,96,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,float16,0,0.018806399405002595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,fp8,fp8,0,0.010344000160694122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,float16,0,0.018836799263954162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,float16,0,0.018747200071811677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,1,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,fp8,fp8,0,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,float16,0,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,fp8,0,0.4977519989013672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,4,128,1,fp8,fp8,0,0.010550399869680404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,96,8,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,float16,float16,0,0.5954063892364502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,1,128,1,fp8,fp8,0,0.4977263927459717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,float16,0,0.609174394607544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,float16,fp8,0,0.49707517623901365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,2,128,1,fp8,fp8,0,0.49719038009643557
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,float16,0,0.6052847862243652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,float16,fp8,0,0.4981535911560059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,4,128,1,fp8,fp8,0,0.4974063873291016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,fp8,0,0.4969183921813965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,float16,float16,0,0.63023681640625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,96,8,128,1,fp8,fp8,0,0.49788641929626465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,fp8,0,0.275272011756897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,fp8,fp8,0,0.27510719299316405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,96,128,1,float16,float16,0,0.5442815780639648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,float16,0,0.31136159896850585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,float16,fp8,0,0.2506063938140869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,1,128,1,fp8,fp8,0,0.2508336067199707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,float16,0,0.31314079761505126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,float16,fp8,0,0.2523952007293701
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,2,128,1,fp8,fp8,0,0.25128960609436035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,float16,0,0.31684958934783936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,float16,fp8,0,0.2511631965637207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,4,128,1,fp8,fp8,0,0.2511647939682007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,fp8,fp8,0,0.14223359823226928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,float16,0,0.3258944034576416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,float16,fp8,0,0.2505343914031982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,96,8,128,1,fp8,fp8,0,0.25179200172424315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,float16,0,0.28331680297851564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,96,128,1,float16,fp8,0,0.14216320514678954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,fp8,fp8,0,0.12970080375671386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,float16,0,0.1650928020477295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,float16,fp8,0,0.12978240251541137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,1,128,1,fp8,fp8,0,0.12943199872970582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,float16,0,0.1640687942504883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,2,128,1,float16,fp8,0,0.12942880392074585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,float16,0,0.16774239540100097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,float16,fp8,0,0.1303231954574585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,4,128,1,fp8,fp8,0,0.12985119819641114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,fp8,0,0.07593439817428589
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,float16,0,0.17021759748458862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,float16,fp8,0,0.12935680150985718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,96,8,128,1,fp8,fp8,0,0.12940160036087037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,float16,float16,0,0.13871519565582274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,96,128,1,fp8,fp8,0,0.07574080228805542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,float16,0,0.093641597032547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,float16,fp8,0,0.07010719776153565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,1,128,1,fp8,fp8,0,0.06963840126991272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,float16,0,0.09312480092048644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,float16,fp8,0,0.06947519779205322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,2,128,1,fp8,fp8,0,0.07019360065460205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,float16,0,0.09356639981269836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,float16,fp8,0,0.07001919746398926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,4,128,1,fp8,fp8,0,0.07015039920806884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,float16,0,0.09316480159759521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,float16,fp8,0,0.06966080069541931
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,96,8,128,1,fp8,fp8,0,0.06952959895133973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,float16,0,0.07417280077934266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,float16,fp8,0,0.043217599391937256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,96,128,1,fp8,fp8,0,0.042668798565864564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,float16,0,0.05559200048446655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,float16,fp8,0,0.03939360082149505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,1,128,1,fp8,fp8,0,0.03951520025730133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,float16,0,0.05579040050506592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,float16,fp8,0,0.03929120004177093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,2,128,1,fp8,fp8,0,0.04014239907264709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,float16,0,0.055606400966644286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,float16,fp8,0,0.03997919857501984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,4,128,1,fp8,fp8,0,0.03938240110874176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,float16,0,0.056094402074813844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,float16,fp8,0,0.03940320014953613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,96,8,128,1,fp8,fp8,0,0.03940800130367279
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,float16,0,0.04575360119342804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,float16,fp8,0,0.026043200492858888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,96,128,1,fp8,fp8,0,0.026804798841476442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,float16,0,0.037064000964164734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,float16,fp8,0,0.02481440007686615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,1,128,1,fp8,fp8,0,0.024809600412845613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,float16,0,0.037099200487136844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,float16,fp8,0,0.024884800612926482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,2,128,1,fp8,fp8,0,0.024899199604988098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,float16,0,0.03700799942016601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,4,128,1,fp8,fp8,0,0.024758400022983552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,float16,0,0.03707680106163025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,float16,fp8,0,0.024932800233364104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,96,8,128,1,fp8,fp8,0,0.024873599410057068
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,float16,0,0.030953601002693176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,float16,fp8,0,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,96,128,1,fp8,fp8,0,0.016598400473594666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,float16,0,0.026843199133872987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,float16,fp8,0,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,1,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,float16,0,0.02696320116519928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,fp8,fp8,0,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,float16,0,0.02629759907722473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,2,128,1,float16,fp8,0,0.01653439998626709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,float16,fp8,0,0.01671839952468872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,4,128,1,fp8,fp8,0,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,float16,0,0.026881599426269533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,float16,fp8,0,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,float16,0,0.022771200537681578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,96,8,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,96,128,1,fp8,fp8,0,0.012654399871826172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,float16,0,0.020931200683116914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,float16,fp8,0,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,1,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,fp8,0,0.012593600153923034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,float16,float16,0,0.02272319942712784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,float16,0,0.02277279943227768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,float16,fp8,0,0.012652799487113953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,2,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,4,128,1,fp8,fp8,0,0.01263200044631958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,float16,0,0.02260800004005432
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,float16,fp8,0,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,96,8,128,1,fp8,fp8,0,0.012734399735927581
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,96,128,1,float16,float16,0,0.019551999866962433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,float16,0,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,1,128,1,fp8,fp8,0,0.009609600156545639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,float16,0,0.018568000197410582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,float16,fp8,0,0.010502400249242783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,2,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,float16,0,0.018806399405002595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,96,8,128,1,fp8,fp8,0,0.009884800016880035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,fp8,0,23.176380920410157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,fp8,fp8,0,23.104678344726562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,fp8,0,23.165989685058594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,fp8,fp8,0,22.995643615722656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,1,128,1,float16,float16,0,29.280728149414063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,2,128,1,float16,float16,0,29.261703491210938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,fp8,0,23.087940979003907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,float16,float16,0,29.755123901367188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,fp8,0,11.855007934570313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,fp8,fp8,0,11.914702606201171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,float16,0,14.607679748535157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,4,128,1,fp8,fp8,0,23.326324462890625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,fp8,0,23.326837158203126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,fp8,fp8,0,23.356593322753906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,64,128,1,float16,float16,0,19.435662841796876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,64,8,128,1,float16,float16,0,30.594085693359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,float16,fp8,0,11.626814270019532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,1,128,1,fp8,fp8,0,11.785116577148438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,fp8,0,11.700236511230468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,fp8,fp8,0,11.693732452392577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,2,128,1,float16,float16,0,14.823121643066406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,fp8,0,11.550918579101562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,float16,float16,0,15.075262451171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,4,128,1,fp8,fp8,0,11.609158325195313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,fp8,fp8,0,6.053937530517578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,fp8,0,6.148094558715821
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,fp8,0,11.744481658935547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,64,128,1,float16,float16,0,9.435807800292968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,float16,0,7.449495697021485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,fp8,fp8,0,11.808847808837891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,64,8,128,1,float16,float16,0,15.470848083496094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,float16,fp8,0,5.790091323852539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,1,128,1,fp8,fp8,0,5.823934555053711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,fp8,0,5.828924942016601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,float16,float16,0,7.471153259277344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,2,128,1,fp8,fp8,0,5.791513442993164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,fp8,0,5.845974349975586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,float16,float16,0,7.4765571594238285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,4,128,1,fp8,fp8,0,5.9176513671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,fp8,0,3.0089216232299805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,fp8,0,5.850476837158203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,float16,float16,0,4.764199829101562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,float16,float16,0,7.709849548339844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,64,128,1,fp8,fp8,0,3.052136039733887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,64,8,128,1,fp8,fp8,0,5.779550552368164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,fp8,0,2.8738208770751954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,fp8,fp8,0,2.91463680267334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,1,128,1,float16,float16,0,3.759318542480469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,float16,0,3.678035354614258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,float16,fp8,0,3.1813215255737304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,2,128,1,fp8,fp8,0,3.1459455490112305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,fp8,0,2.905531120300293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,fp8,fp8,0,2.8578624725341797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,4,128,1,float16,float16,0,3.6180702209472657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,fp8,0,3.207062530517578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,float16,float16,0,3.7557838439941404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,64,8,128,1,fp8,fp8,0,3.0451311111450194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,fp8,0,13.2624267578125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,fp8,fp8,0,13.453924560546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,fp8,0,13.310212707519531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,fp8,fp8,0,13.319566345214843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,1,128,1,float16,float16,0,16.966999816894532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,fp8,0,13.384629821777343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,2,128,1,float16,float16,0,17.005909729003907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,float16,float16,0,17.48847198486328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,fp8,0,7.10278549194336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,fp8,fp8,0,7.102166748046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,4,128,1,fp8,fp8,0,13.632769775390624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,float16,0,8.551878356933594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,fp8,0,13.611944580078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,64,128,1,float16,float16,0,11.559225463867188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,fp8,fp8,0,13.603944396972656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,float16,fp8,0,6.653665924072266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,64,8,128,1,float16,float16,0,17.814659118652344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,1,128,1,fp8,fp8,0,6.562369537353516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,fp8,0,6.765862274169922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,fp8,fp8,0,6.703817749023438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,fp8,0,6.742790222167969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,2,128,1,float16,float16,0,8.533080291748046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,float16,float16,0,8.54714584350586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,4,128,1,fp8,fp8,0,6.705680084228516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,fp8,0,6.6897117614746096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,fp8,0,3.5588096618652343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,fp8,fp8,0,3.8097393035888674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,float16,float16,0,8.973633575439454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,64,128,1,float16,float16,0,5.799900817871094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,fp8,0,3.3243377685546873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,float16,float16,0,4.188375854492188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,64,8,128,1,fp8,fp8,0,6.704051208496094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,1,128,1,fp8,fp8,0,3.311201477050781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,fp8,0,3.286054229736328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,float16,float16,0,4.141465759277343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,2,128,1,fp8,fp8,0,3.6682880401611326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,fp8,0,3.3260944366455076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,float16,float16,0,3.924630355834961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,4,128,1,fp8,fp8,0,3.8152481079101563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,fp8,0,3.2913280487060548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,float16,float16,0,4.303977584838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,fp8,0,1.7778383255004884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,fp8,fp8,0,1.8322719573974608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,64,8,128,1,fp8,fp8,0,3.3441951751708983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,64,128,1,float16,float16,0,3.2845664978027345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,fp8,0,1.7548847198486328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,fp8,fp8,0,1.6960847854614258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,1,128,1,float16,float16,0,2.364169692993164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,float16,0,1.9557151794433594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,float16,fp8,0,1.67542724609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,2,128,1,fp8,fp8,0,1.757676887512207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,float16,0,2.008513641357422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,float16,fp8,0,1.6817743301391601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,4,128,1,fp8,fp8,0,1.6476287841796875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,float16,0,2.1628223419189454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,float16,fp8,0,2.0477872848510743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,64,8,128,1,fp8,fp8,0,1.6425743103027344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,fp8,fp8,0,9.406009674072266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,fp8,0,9.417958068847657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,fp8,fp8,0,9.371408081054687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,fp8,0,9.517729949951171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,1,128,1,float16,float16,0,12.078963470458984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,2,128,1,float16,float16,0,11.889995574951172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,fp8,0,9.32956771850586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,float16,float16,0,12.229456329345703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,fp8,fp8,0,4.897103881835937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,fp8,0,5.119563293457031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,4,128,1,fp8,fp8,0,9.652556610107421
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,float16,0,6.053827285766602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,fp8,0,9.52289276123047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,fp8,fp8,0,9.500137329101562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,64,128,1,float16,float16,0,8.793939208984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,64,8,128,1,float16,float16,0,12.588641357421874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,float16,fp8,0,4.793948745727539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,1,128,1,fp8,fp8,0,4.619863891601563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,fp8,0,4.973451232910156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,float16,float16,0,6.010225677490235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,2,128,1,fp8,fp8,0,4.708129501342773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,fp8,0,4.666659164428711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,float16,float16,0,6.169070434570313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,4,128,1,fp8,fp8,0,4.972137451171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,fp8,0,4.643756866455078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,fp8,0,2.5231327056884765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,fp8,fp8,0,2.4571744918823244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,float16,float16,0,5.984574508666992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,64,8,128,1,fp8,fp8,0,4.686094284057617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,float16,0,3.0776735305786134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,64,128,1,float16,float16,0,4.5750385284423825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,float16,fp8,0,2.4242143630981445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,1,128,1,fp8,fp8,0,2.3281904220581056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,fp8,0,2.315731239318848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,float16,float16,0,2.826192092895508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,2,128,1,fp8,fp8,0,2.692620849609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,fp8,fp8,0,2.3302783966064453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,float16,0,2.7607376098632814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,fp8,0,2.336319923400879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,4,128,1,float16,fp8,0,2.7011039733886717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,fp8,0,1.3329423904418944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,fp8,fp8,0,2.3618751525878907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,64,8,128,1,float16,float16,0,2.992001533508301
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,float16,float16,0,2.4692079544067385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,64,128,1,fp8,fp8,0,1.3064720153808593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,fp8,0,1.2567407608032226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,fp8,fp8,0,1.2036383628845215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,1,128,1,float16,float16,0,1.5515040397644042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,float16,0,1.3990608215332032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,float16,fp8,0,1.208409595489502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,float16,0,1.4249327659606934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,2,128,1,fp8,fp8,0,1.5566304206848145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,float16,fp8,0,1.1932576179504395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,4,128,1,fp8,fp8,0,1.183631992340088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,float16,0,1.4634672164916993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,float16,fp8,0,1.4978447914123536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,64,8,128,1,fp8,fp8,0,1.4394864082336425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,fp8,fp8,0,12.30230255126953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,fp8,0,12.504576110839844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,fp8,0,12.315750122070312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,fp8,fp8,0,12.320476531982422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,fp8,0,12.263855743408204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,1,128,1,float16,float16,0,15.732489013671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,2,128,1,float16,float16,0,15.757080078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,float16,float16,0,16.074055480957032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,fp8,0,6.99522705078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,fp8,fp8,0,6.673461151123047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,float16,0,7.860598754882813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,4,128,1,fp8,fp8,0,12.584001922607422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,fp8,0,12.522608184814453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,fp8,fp8,0,12.622067260742188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,64,128,1,float16,float16,0,12.196353912353516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,float16,fp8,0,6.094184112548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,64,8,128,1,float16,float16,0,16.843031311035155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,1,128,1,fp8,fp8,0,6.082099151611328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,fp8,0,6.212535858154297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,fp8,fp8,0,6.15534553527832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,fp8,0,6.118127822875977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,2,128,1,float16,float16,0,7.913520050048828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,float16,float16,0,8.152721405029297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,4,128,1,fp8,fp8,0,6.1986846923828125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,fp8,0,6.235184097290039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,fp8,0,3.3417152404785155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,fp8,fp8,0,3.6327247619628906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,float16,float16,0,8.258328247070313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,64,128,1,float16,float16,0,6.0010734558105465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,64,8,128,1,fp8,fp8,0,6.209563064575195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,float16,0,3.729840087890625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,fp8,fp8,0,3.0776016235351564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,1,128,1,float16,fp8,0,3.0663951873779296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,float16,0,3.778116989135742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,float16,fp8,0,3.3794559478759765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,2,128,1,fp8,fp8,0,3.04724006652832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,fp8,0,3.261897659301758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,float16,float16,0,3.8083072662353517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,4,128,1,fp8,fp8,0,3.074515151977539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,float16,0,3.983327865600586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,float16,fp8,0,3.309307098388672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,fp8,0,1.6827280044555664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,fp8,fp8,0,1.9833440780639648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,float16,0,1.782364845275879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,64,8,128,1,fp8,fp8,0,3.059118461608887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,64,128,1,float16,float16,0,3.008078384399414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,float16,fp8,0,1.5020208358764648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,1,128,1,fp8,fp8,0,1.8765392303466797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,fp8,0,1.5057567596435546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,float16,float16,0,1.799087905883789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,2,128,1,fp8,fp8,0,1.602956771850586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,fp8,0,1.5524527549743652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,fp8,fp8,0,1.538096046447754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,4,128,1,float16,float16,0,2.05035514831543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,float16,0,1.894063949584961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,fp8,0,0.8749055862426758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,float16,fp8,0,1.5407872200012207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,64,8,128,1,fp8,fp8,0,1.5123791694641113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,fp8,fp8,0,0.8561231613159179
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,float16,0,0.9110048294067383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,64,128,1,float16,float16,0,1.6535823822021485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,float16,fp8,0,0.9233695983886718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,1,128,1,fp8,fp8,0,0.9119248390197754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,float16,0,0.9224575996398926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,float16,fp8,0,0.8196479797363281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,2,128,1,fp8,fp8,0,0.7952799797058105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,float16,0,0.9419024467468262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,float16,fp8,0,0.9078479766845703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,4,128,1,fp8,fp8,0,0.8592047691345215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,fp8,0,0.8264880180358887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,float16,float16,0,1.0585840225219727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,64,8,128,1,fp8,fp8,0,0.8157584190368652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,fp8,0,7.06908950805664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,fp8,fp8,0,7.059272003173828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,fp8,fp8,0,7.035860443115235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,fp8,0,7.084689331054688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,fp8,0,7.142495727539062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,1,128,1,float16,float16,0,9.030348968505859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,2,128,1,float16,float16,0,8.846401977539063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,float16,float16,0,9.353300476074219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,fp8,0,4.113659286499024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,fp8,fp8,0,3.973923110961914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,float16,0,4.400656127929688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,4,128,1,fp8,fp8,0,7.234276580810547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,fp8,0,7.214366149902344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,fp8,fp8,0,7.277378845214844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,64,128,1,float16,float16,0,7.785214233398437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,float16,fp8,0,3.5441471099853517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,64,8,128,1,float16,float16,0,9.874632263183594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,1,128,1,fp8,fp8,0,3.4925521850585937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,fp8,0,3.5474048614501954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,fp8,fp8,0,3.606572723388672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,2,128,1,float16,float16,0,4.504560089111328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,fp8,0,3.450640106201172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,float16,float16,0,4.749151992797851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,4,128,1,fp8,fp8,0,3.5849071502685548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,fp8,0,3.555683135986328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,fp8,0,2.3491071701049804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,fp8,fp8,0,1.9906896591186523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,float16,float16,0,4.774017715454102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,64,8,128,1,fp8,fp8,0,3.5947967529296876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,float16,0,2.420867156982422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,64,128,1,float16,float16,0,3.8675968170166017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,float16,fp8,0,1.776478385925293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,1,128,1,fp8,fp8,0,1.7722015380859375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,fp8,0,1.7791711807250976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,float16,float16,0,2.068606376647949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,2,128,1,fp8,fp8,0,2.214588737487793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,float16,0,2.16592960357666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,fp8,fp8,0,1.785148811340332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,4,128,1,float16,fp8,0,1.9673423767089844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,fp8,0,1.7777088165283204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,float16,float16,0,2.309459114074707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,fp8,0,1.0181599617004395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,64,8,128,1,fp8,fp8,0,1.7525215148925781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,float16,0,1.0423312187194824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,fp8,fp8,0,1.2054240226745605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,float16,fp8,0,0.9279024124145507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,1,128,1,fp8,fp8,0,0.9228351593017579
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,64,128,1,float16,float16,0,2.157723236083984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,float16,0,1.1728303909301758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,float16,fp8,0,0.9005743980407714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,2,128,1,fp8,fp8,0,0.9327471733093262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,float16,0,1.099937629699707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,fp8,fp8,0,0.908785629272461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,4,128,1,float16,fp8,0,1.1245408058166504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,fp8,0,0.9045743942260742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,float16,float16,0,1.1407152175903321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,fp8,0,0.5255263805389404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,64,8,128,1,fp8,fp8,0,1.045907211303711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,fp8,fp8,0,0.5620736122131348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,64,128,1,float16,float16,0,1.006436824798584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,float16,0,0.5470255851745606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,float16,fp8,0,0.58646240234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,1,128,1,fp8,fp8,0,0.5413951873779297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,float16,0,0.5515647888183594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,float16,fp8,0,0.517574405670166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,2,128,1,fp8,fp8,0,0.4948160171508789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,float16,0,0.5657887935638428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,float16,fp8,0,0.4728640079498291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,4,128,1,fp8,fp8,0,0.48374080657958984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,fp8,0,0.47223677635192873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,float16,float16,0,0.6040448188781739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,64,8,128,1,fp8,fp8,0,0.49436798095703127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,fp8,0,6.699334716796875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,fp8,fp8,0,6.730734252929688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,fp8,0,6.6959678649902346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,fp8,fp8,0,6.647539520263672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,1,128,1,float16,float16,0,8.474361419677734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,2,128,1,float16,float16,0,8.618603515625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,float16,0,8.809166717529298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,float16,fp8,0,6.699180603027344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,fp8,0,4.200619125366211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,4,128,1,fp8,fp8,0,6.720526123046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,fp8,0,6.761319732666015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,fp8,fp8,0,6.696656036376953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,fp8,fp8,0,3.9577072143554686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,float16,0,4.082774353027344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,64,8,128,1,float16,float16,0,9.46421127319336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,float16,fp8,0,3.2831649780273438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,64,128,1,float16,float16,0,8.757881927490235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,1,128,1,fp8,fp8,0,3.390140914916992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,fp8,0,3.412300872802734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,fp8,fp8,0,3.3748096466064452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,2,128,1,float16,float16,0,4.274051284790039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,float16,0,4.327628707885742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,float16,fp8,0,3.857503890991211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,4,128,1,fp8,fp8,0,3.390428924560547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,fp8,0,3.3991809844970704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,fp8,0,2.4589727401733397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,fp8,fp8,0,3.4375438690185547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,64,8,128,1,float16,float16,0,4.555807876586914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,fp8,fp8,0,2.341059112548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,fp8,0,1.6846048355102539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,float16,float16,0,1.960215950012207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,64,128,1,float16,float16,0,4.398606491088867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,1,128,1,fp8,fp8,0,1.7012800216674804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,float16,0,2.0798959732055664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,float16,fp8,0,1.685795211791992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,2,128,1,fp8,fp8,0,1.9336240768432618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,fp8,0,1.6873584747314454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,float16,float16,0,2.041342353820801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,4,128,1,fp8,fp8,0,1.8571584701538086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,fp8,0,1.704697608947754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,fp8,0,0.9814352035522461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,float16,float16,0,2.1944175720214845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,64,8,128,1,fp8,fp8,0,2.0981903076171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,fp8,fp8,0,0.968727970123291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,64,128,1,float16,float16,0,2.2305696487426756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,fp8,0,0.8888303756713867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,fp8,fp8,0,0.860534381866455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,1,128,1,float16,float16,0,1.132595157623291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,float16,0,0.9872575759887695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,float16,fp8,0,0.9453935623168945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,2,128,1,fp8,fp8,0,1.074948787689209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,float16,0,1.0340831756591797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,float16,fp8,0,0.8400495529174805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,4,128,1,fp8,fp8,0,0.8912495613098145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,float16,0,1.112448024749756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,float16,fp8,0,1.0265423774719238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,64,8,128,1,fp8,fp8,0,0.8975040435791015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,float16,0,0.5021743774414062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,fp8,0,0.576416015625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,fp8,fp8,0,0.5739967823028564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,float16,fp8,0,0.4388415813446045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,64,128,1,float16,float16,0,1.2580559730529786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,1,128,1,fp8,fp8,0,0.4566336154937744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,float16,0,0.5356656074523926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,fp8,0,0.46454558372497556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,float16,float16,0,0.5785903930664062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,2,128,1,fp8,fp8,0,0.4379248142242432
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,float16,fp8,0,0.4640592098236084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,4,128,1,fp8,fp8,0,0.4385712146759033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,float16,0,0.5747136116027832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,float16,fp8,0,0.4408544063568115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,64,8,128,1,fp8,fp8,0,0.4385551929473877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,fp8,0,0.29831039905548096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,float16,float16,0,0.5890048027038575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,64,128,1,fp8,fp8,0,0.27120959758758545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,float16,0,0.2744175910949707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,float16,fp8,0,0.2384592056274414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,1,128,1,fp8,fp8,0,0.23902080059051514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,float16,0,0.2804879903793335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,float16,fp8,0,0.23904318809509278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,2,128,1,fp8,fp8,0,0.24192800521850585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,float16,0,0.28793280124664306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,float16,fp8,0,0.2383359909057617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,4,128,1,fp8,fp8,0,0.24035999774932862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,float16,0,0.3081952095031738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,float16,fp8,0,0.23834879398345948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,64,8,128,1,fp8,fp8,0,0.2398751974105835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,fp8,0,3.904451370239258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,fp8,fp8,0,3.9290367126464845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,1,128,1,float16,float16,0,4.912246322631836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,fp8,0,3.929526519775391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,fp8,fp8,0,3.8815040588378906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,2,128,1,float16,float16,0,4.899934387207031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,float16,0,5.059899139404297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,float16,fp8,0,3.9571537017822265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,4,128,1,fp8,fp8,0,3.956243133544922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,fp8,0,3.9634654998779295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,fp8,fp8,0,4.00432014465332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,fp8,0,2.7631504058837892
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,fp8,fp8,0,2.315779113769531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,fp8,0,1.9777471542358398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,64,8,128,1,float16,float16,0,5.849934387207031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,float16,float16,0,2.28460807800293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,1,128,1,fp8,fp8,0,2.048753547668457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,fp8,0,1.9882720947265624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,float16,float16,0,2.442576026916504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,64,128,1,float16,float16,0,6.00855827331543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,2,128,1,fp8,fp8,0,1.960740852355957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,fp8,0,2.0673248291015627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,float16,float16,0,2.451972770690918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,4,128,1,fp8,fp8,0,2.4020015716552736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,fp8,0,1.189742374420166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,fp8,0,1.9651264190673827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,float16,float16,0,2.7654272079467774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,64,8,128,1,fp8,fp8,0,2.3256208419799806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,fp8,fp8,0,1.2081695556640626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,fp8,0,1.0112575531005858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,fp8,fp8,0,0.9889216423034668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,1,128,1,float16,float16,0,1.4419599533081056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,64,128,1,float16,float16,0,2.9830080032348634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,float16,0,1.188548755645752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,float16,fp8,0,1.161073589324951
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,2,128,1,fp8,fp8,0,1.0560383796691895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,fp8,0,0.9884127616882324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,float16,float16,0,1.2450559616088868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,4,128,1,fp8,fp8,0,0.9865504264831543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,fp8,0,0.9859295845031738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,fp8,0,0.6277952194213867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,fp8,fp8,0,0.6041920185089111
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,fp8,fp8,0,1.1445551872253419
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,64,8,128,1,float16,float16,0,1.5629520416259766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,fp8,fp8,0,0.5093776226043701
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,fp8,0,0.5087823867797852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,1,128,1,float16,float16,0,0.6236480236053467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,float16,0,0.6038000106811523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,64,128,1,float16,float16,0,1.6016351699829101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,float16,fp8,0,0.5092336177825928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,2,128,1,fp8,fp8,0,0.5490767955780029
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,float16,0,0.650867223739624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,float16,fp8,0,0.5089856147766113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,4,128,1,fp8,fp8,0,0.5092703819274902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,float16,0,0.7099135875701904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,fp8,fp8,0,0.512611198425293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,fp8,0,0.33034720420837405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,64,8,128,1,float16,fp8,0,0.5100111961364746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,fp8,fp8,0,0.36902880668640137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,64,128,1,float16,float16,0,0.7759168148040771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,float16,0,0.3128544092178345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,float16,fp8,0,0.2709104061126709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,1,128,1,fp8,fp8,0,0.28075199127197265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,float16,0,0.3205615997314453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,float16,fp8,0,0.2712991952896118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,2,128,1,fp8,fp8,0,0.270958399772644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,float16,0,0.34883201122283936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,float16,fp8,0,0.27295680046081544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,4,128,1,fp8,fp8,0,0.2709007978439331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,float16,0,0.36147360801696776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,float16,fp8,0,0.27295360565185545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,64,8,128,1,fp8,fp8,0,0.27117919921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,fp8,0,0.17596160173416137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,float16,float16,0,0.4094575881958008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,64,128,1,fp8,fp8,0,0.17675039768218995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,float16,0,0.17786719799041747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,float16,fp8,0,0.1508463978767395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,1,128,1,fp8,fp8,0,0.1506175994873047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,float16,0,0.1818719983100891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,float16,fp8,0,0.1505952000617981
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,2,128,1,fp8,fp8,0,0.1505344033241272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,float16,0,0.1869312047958374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,float16,fp8,0,0.15107359886169433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,4,128,1,fp8,fp8,0,0.15329920053482055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,float16,0,0.20266399383544922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,float16,fp8,0,0.15086239576339722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,64,8,128,1,fp8,fp8,0,0.151147198677063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,fp8,0,3.8242401123046874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,fp8,fp8,0,3.8778656005859373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,fp8,0,3.874612808227539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,1,128,1,float16,float16,0,4.880836868286133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,fp8,fp8,0,3.88256950378418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,fp8,0,3.8756175994873048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,2,128,1,float16,float16,0,4.85070571899414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,float16,float16,0,5.230104064941406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,fp8,0,2.4810672760009767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,4,128,1,fp8,fp8,0,3.914044952392578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,fp8,fp8,0,2.8102304458618166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,float16,0,2.4004304885864256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,fp8,0,3.9317264556884766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,fp8,fp8,0,4.290940856933593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,64,8,128,1,float16,float16,0,5.871897506713867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,float16,fp8,0,1.9747215270996095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,1,128,1,fp8,fp8,0,1.973214340209961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,fp8,0,1.9718143463134765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,float16,float16,0,2.419193649291992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,2,128,1,fp8,fp8,0,1.9852304458618164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,64,128,1,float16,float16,0,7.134425354003906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,fp8,0,1.9441856384277343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,float16,float16,0,2.5607583999633787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,4,128,1,fp8,fp8,0,1.9299983978271484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,fp8,0,2.177395248413086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,fp8,fp8,0,1.962499237060547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,64,8,128,1,float16,float16,0,2.8569919586181642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,fp8,0,1.2405743598937988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,float16,0,1.1329055786132813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,fp8,fp8,0,1.4115216255187988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,float16,fp8,0,1.0260479927062989
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,1,128,1,fp8,fp8,0,1.0201711654663086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,float16,0,1.1695055961608887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,fp8,fp8,0,1.0001952171325683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,2,128,1,float16,fp8,0,1.119654369354248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,fp8,0,1.0202159881591797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,64,128,1,float16,float16,0,3.5728382110595702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,float16,float16,0,1.2973551750183105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,4,128,1,fp8,fp8,0,0.9957663536071777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,fp8,0,1.0308927536010741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,fp8,fp8,0,0.9777839660644532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,fp8,0,0.6375408172607422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,64,8,128,1,float16,float16,0,1.4169391632080077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,fp8,fp8,0,0.6230847835540771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,fp8,0,0.5017439842224121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,float16,float16,0,0.6245408058166504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,1,128,1,fp8,fp8,0,0.509438419342041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,64,128,1,float16,float16,0,1.8347328186035157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,float16,0,0.5942319869995117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,float16,fp8,0,0.4983664035797119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,2,128,1,fp8,fp8,0,0.5560495853424072
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,float16,0,0.6330031871795654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,float16,fp8,0,0.49930400848388673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,4,128,1,fp8,fp8,0,0.5306223869323731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,fp8,0,0.5012159824371338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,float16,float16,0,0.7137919902801514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,64,8,128,1,fp8,fp8,0,0.4980288028717041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,fp8,0,0.34040958881378175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,fp8,fp8,0,0.26362080574035646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,fp8,fp8,0,0.3242863893508911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,float16,0,0.3015343904495239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,64,128,1,float16,float16,0,0.9172271728515625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,1,128,1,float16,fp8,0,0.2624527931213379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,float16,0,0.3104448080062866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,float16,fp8,0,0.26137599945068357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,2,128,1,fp8,fp8,0,0.2624000072479248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,fp8,0,0.2628720045089722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,float16,0,0.33149919509887693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,float16,fp8,0,0.26168639659881593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,4,128,1,fp8,fp8,0,0.26270720958709715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,float16,float16,0,0.3707087993621826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,64,8,128,1,fp8,fp8,0,0.2619231939315796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,fp8,0,0.17544000148773192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,float16,float16,0,0.4767104148864746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,64,128,1,fp8,fp8,0,0.17626880407333373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,float16,0,0.17104640007019042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,float16,fp8,0,0.14320160150527955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,1,128,1,fp8,fp8,0,0.14343199729919434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,float16,0,0.17574880123138428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,float16,fp8,0,0.1428704023361206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,2,128,1,fp8,fp8,0,0.14328960180282593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,float16,0,0.1836527943611145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,float16,fp8,0,0.14380639791488647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,4,128,1,fp8,fp8,0,0.14451520442962645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,float16,0,0.20303680896759033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,float16,fp8,0,0.14436960220336914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,64,8,128,1,fp8,fp8,0,0.14479680061340333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,float16,0,0.2576303958892822
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,float16,fp8,0,0.10144319534301757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,float16,0,0.09988800287246705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,float16,fp8,0,0.08475520014762879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,64,128,1,fp8,fp8,0,0.1013375997543335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,1,128,1,fp8,fp8,0,0.08443520069122315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,fp8,fp8,0,0.08489760160446166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,float16,0,0.1000864028930664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,float16,fp8,0,0.08473280072212219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,2,128,1,fp8,fp8,0,0.08498719930648804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,float16,0,0.10692479610443115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,4,128,1,float16,fp8,0,0.08420640230178833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,float16,0,0.11365920305252075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,float16,fp8,0,0.08449919819831848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,64,8,128,1,fp8,fp8,0,0.08478720188140869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,fp8,0,2.3940624237060546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,fp8,fp8,0,2.378099250793457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,1,128,1,float16,float16,0,2.869937515258789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,fp8,0,2.380068778991699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,fp8,fp8,0,2.3913248062133787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,2,128,1,float16,float16,0,3.0295072555541993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,fp8,0,2.390795135498047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,float16,float16,0,3.268044662475586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,4,128,1,fp8,fp8,0,2.380419158935547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,fp8,0,1.804654312133789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,fp8,fp8,0,2.387076759338379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,fp8,0,2.815695953369141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,fp8,fp8,0,1.6780336380004883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,64,8,128,1,float16,float16,0,3.6345504760742187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,float16,0,1.4112768173217773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,float16,fp8,0,1.2649920463562012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,1,128,1,fp8,fp8,0,1.2283072471618652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,fp8,0,1.2127264022827149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,float16,float16,0,1.4481552124023438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,2,128,1,fp8,fp8,0,1.2433199882507324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,64,128,1,float16,float16,0,5.061838531494141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,fp8,0,1.2191151618957519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,fp8,fp8,0,1.1983407974243163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,4,128,1,float16,float16,0,1.6241552352905273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,fp8,0,1.2015904426574706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,fp8,fp8,0,1.292950439453125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,64,8,128,1,float16,float16,0,1.8019807815551758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,fp8,0,0.7945551872253418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,float16,0,0.7045296192169189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,fp8,fp8,0,0.7952511787414551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,float16,fp8,0,0.7160511970520019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,1,128,1,fp8,fp8,0,0.6117487907409668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,float16,0,0.7462416172027588
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,float16,fp8,0,0.6481215953826904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,2,128,1,fp8,fp8,0,0.6094351768493652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,float16,0,0.7919104099273682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,float16,fp8,0,0.6096960067749023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,64,128,1,float16,float16,0,2.543526458740234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,4,128,1,fp8,fp8,0,0.6690688133239746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,fp8,0,0.6558656215667724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,fp8,fp8,0,0.6164624214172363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,64,8,128,1,float16,float16,0,0.9099920272827149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,fp8,0,0.41211519241333006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,fp8,fp8,0,0.4078847885131836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,float16,0,0.3637200117111206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,float16,fp8,0,0.3286319971084595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,64,128,1,float16,float16,0,1.2823311805725097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,1,128,1,fp8,fp8,0,0.32124640941619875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,float16,0,0.3768671989440918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,fp8,fp8,0,0.3188767910003662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,float16,fp8,0,0.31723361015319823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,2,128,1,fp8,fp8,0,0.3244271993637085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,float16,0,0.4075984001159668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,fp8,fp8,0,0.31593921184539797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,4,128,1,float16,fp8,0,0.31558239459991455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,fp8,0,0.31768479347229006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,fp8,0,0.16921759843826295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,64,8,128,1,float16,float16,0,0.46538238525390624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,fp8,0,0.2156831979751587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,fp8,fp8,0,0.2179647922515869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,64,128,1,float16,float16,0,0.6604496002197265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,float16,float16,0,0.2000351905822754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,1,128,1,fp8,fp8,0,0.1688639998435974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,fp8,fp8,0,0.16920000314712524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,float16,0,0.20636160373687745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,float16,fp8,0,0.16906239986419677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,2,128,1,fp8,fp8,0,0.1689087986946106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,float16,0,0.21987040042877198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,4,128,1,float16,fp8,0,0.1711359977722168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,fp8,fp8,0,0.11966559886932374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,float16,0,0.11638879776000977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,float16,0,0.24881279468536377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,float16,fp8,0,0.1691248059272766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,64,8,128,1,fp8,fp8,0,0.16948000192642212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,fp8,0,0.12006080150604248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,64,128,1,float16,float16,0,0.346564793586731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,float16,fp8,0,0.09558719992637635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,1,128,1,fp8,fp8,0,0.09504160284996033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,float16,0,0.12154239416122437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,float16,fp8,0,0.09487839937210082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,2,128,1,fp8,fp8,0,0.09526240229606628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,float16,0,0.12862720489501953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,float16,fp8,0,0.09508000016212463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,4,128,1,fp8,fp8,0,0.09467039704322815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,float16,0,0.14430079460144044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,float16,fp8,0,0.09491680264472961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,64,8,128,1,fp8,fp8,0,0.09474400281906128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,float16,0,0.1889680027961731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,float16,fp8,0,0.06977919936180114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,64,128,1,fp8,fp8,0,0.06927520036697388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,float16,0,0.0722656011581421
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,float16,fp8,0,0.05759199857711792
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,1,128,1,fp8,fp8,0,0.05770080089569092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,float16,0,0.0719327986240387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,float16,fp8,0,0.057631999254226685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,2,128,1,fp8,fp8,0,0.05760480165481567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,float16,0,0.07609919905662536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,float16,fp8,0,0.05772320032119751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,4,128,1,fp8,fp8,0,0.05773280262947082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,float16,0,0.08144320249557495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,float16,fp8,0,0.057822400331497194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,64,8,128,1,fp8,fp8,0,0.057817602157592775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,fp8,0,2.525559997558594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,fp8,fp8,0,2.5103647232055666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,1,128,1,float16,float16,0,3.0043664932250977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,fp8,0,2.51287841796875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,fp8,fp8,0,2.5141551971435545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,2,128,1,float16,float16,0,3.188105583190918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,fp8,0,2.509499168395996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,float16,float16,0,3.477054214477539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,4,128,1,fp8,fp8,0,2.5332687377929686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,fp8,0,1.752244758605957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,fp8,fp8,0,2.5349824905395506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,fp8,0,2.9921968460083006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,fp8,fp8,0,1.995582389831543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,64,8,128,1,float16,float16,0,4.089491271972657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,float16,0,1.4652720451354981
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,float16,fp8,0,1.263584041595459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,1,128,1,fp8,fp8,0,1.3068032264709473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,float16,0,1.5646143913269044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,float16,fp8,0,1.2797743797302246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,2,128,1,fp8,fp8,0,1.3169216156005858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,fp8,0,1.2630960464477539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,fp8,fp8,0,1.2703871726989746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,4,128,1,float16,float16,0,1.8694160461425782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,fp8,fp8,0,1.2672752380371093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,fp8,0,1.3648799896240233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,fp8,0,0.8901535987854003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,8,128,1,float16,float16,0,2.011952018737793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,64,64,128,1,float16,float16,0,6.3734382629394535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,float16,0,0.7519472122192383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,fp8,fp8,0,0.8865679740905762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,float16,fp8,0,0.6403391838073731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,1,128,1,fp8,fp8,0,0.6736959934234619
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,fp8,0,0.6656303882598877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,float16,float16,0,0.7741392135620118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,2,128,1,fp8,fp8,0,0.6404128074645996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,fp8,0,0.6409023761749267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,fp8,fp8,0,0.6407311916351318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,4,128,1,float16,float16,0,0.8676048278808594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,fp8,0,0.6514080047607422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,64,128,1,float16,float16,0,3.1827823638916017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,float16,float16,0,1.018561553955078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,64,8,128,1,fp8,fp8,0,0.651423978805542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,fp8,0,0.452561616897583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,fp8,fp8,0,0.4526656150817871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,float16,0,0.3845599889755249
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,float16,fp8,0,0.33231680393218993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,1,128,1,fp8,fp8,0,0.3288016080856323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,float16,0,0.39759840965271
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,float16,fp8,0,0.33019199371337893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,2,128,1,fp8,fp8,0,0.3285311937332153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,64,128,1,float16,float16,0,1.600067138671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,float16,0,0.4373663902282715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,float16,fp8,0,0.3304863929748535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,4,128,1,fp8,fp8,0,0.3308271884918213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,float16,0,0.5165647983551025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,float16,fp8,0,0.329532790184021
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,64,8,128,1,fp8,fp8,0,0.33063199520111086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,fp8,0,0.2361072063446045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,fp8,fp8,0,0.2363231897354126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,float16,0,0.20555999279022216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,64,128,1,float16,float16,0,0.8145648002624511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,float16,fp8,0,0.17386080026626588
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,float16,0,0.23536479473114014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,1,128,1,fp8,fp8,0,0.1734511971473694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,float16,0,0.21303999423980713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,float16,fp8,0,0.17324960231781006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,fp8,fp8,0,0.17351360321044923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,2,128,1,fp8,fp8,0,0.1734752058982849
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,float16,fp8,0,0.17379360198974608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,4,128,1,fp8,fp8,0,0.1741968035697937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,float16,0,0.1183743953704834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,float16,0,0.2722383975982666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,64,8,128,1,float16,fp8,0,0.17554240226745604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,fp8,0,0.12685600519180298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,fp8,fp8,0,0.12781440019607543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,64,128,1,float16,float16,0,0.4220448017120361
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,float16,fp8,0,0.09520320296287536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,1,128,1,fp8,fp8,0,0.09550880193710327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,float16,0,0.12515039443969728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,float16,0,0.1525231957435608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,float16,fp8,0,0.09498080015182495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,2,128,1,fp8,fp8,0,0.09619680047035217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,float16,0,0.13294880390167235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,float16,fp8,0,0.09539999961853027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,4,128,1,fp8,fp8,0,0.0952672004699707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,float16,fp8,0,0.09597280025482177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,64,8,128,1,fp8,fp8,0,0.09577119946479798
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,float16,0,0.22529759407043456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,float16,fp8,0,0.07253280282020569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,64,128,1,fp8,fp8,0,0.07294080257415772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,float16,0,0.06999040246009827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,float16,fp8,0,0.05591840147972107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,1,128,1,fp8,fp8,0,0.05567359924316406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,float16,0,0.07028639912605286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,float16,fp8,0,0.055619198083877566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,2,128,1,fp8,fp8,0,0.055883198976516724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,float16,0,0.07540640234947205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,float16,0,0.12321280241012574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,float16,fp8,0,0.0560912013053894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,4,128,1,fp8,fp8,0,0.05562559962272644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,float16,0,0.0825872004032135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,float16,fp8,0,0.05566080212593079
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,64,8,128,1,fp8,fp8,0,0.05560160279273987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,float16,fp8,0,0.04319039881229401
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,64,128,1,fp8,fp8,0,0.04326080083847046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,float16,0,0.04931040108203888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,float16,fp8,0,0.03667680025100708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,1,128,1,fp8,fp8,0,0.035041600465774536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,float16,0,0.049395200610160825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,float16,fp8,0,0.03607040047645569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,2,128,1,fp8,fp8,0,0.03637920022010803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,float16,0,0.049619200825691226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,float16,fp8,0,0.0365231990814209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,4,128,1,fp8,fp8,0,0.03601279854774475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,float16,0,0.05370879769325256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,float16,fp8,0,0.035857599973678586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,64,8,128,1,fp8,fp8,0,0.03588959872722626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,float16,0,2.121931266784668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,float16,fp8,0,1.866551971435547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,1,128,1,fp8,fp8,0,1.8656736373901368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,fp8,0,1.8611936569213867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,fp8,fp8,0,1.8652496337890625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,2,128,1,float16,float16,0,2.3152511596679686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,float16,0,2.62902889251709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,float16,fp8,0,2.0087072372436525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,4,128,1,fp8,fp8,0,1.8632352828979493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,fp8,0,1.4219759941101073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,fp8,0,1.8591600418090821
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,fp8,fp8,0,1.8642240524291993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,fp8,fp8,0,1.4471199989318848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,float16,0,1.0638015747070313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,float16,fp8,0,0.9413616180419921
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,64,8,128,1,float16,float16,0,3.26440315246582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,1,128,1,fp8,fp8,0,0.9392607688903809
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,float16,0,1.1437487602233887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,float16,fp8,0,0.9392208099365235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,2,128,1,fp8,fp8,0,0.944814395904541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,fp8,0,0.9383968353271485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,float16,float16,0,1.3108863830566406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,4,128,1,fp8,fp8,0,0.9883824348449707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,fp8,0,0.9499456405639648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,fp8,fp8,0,0.9390512466430664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,8,128,1,float16,float16,0,1.6262144088745116
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,fp8,0,0.7145088195800782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,float16,0,0.5529391765594482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,float16,fp8,0,0.47882561683654784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,fp8,fp8,0,0.7202159881591796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,1,128,1,fp8,fp8,0,0.4772336006164551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,64,64,128,1,float16,float16,0,5.948171234130859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,float16,0,0.5842671871185303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,float16,fp8,0,0.47598562240600584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,2,128,1,fp8,fp8,0,0.47614078521728515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,float16,0,0.664799976348877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,float16,fp8,0,0.4761040210723877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,4,128,1,fp8,fp8,0,0.47697601318359373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,64,128,1,float16,float16,0,2.97906551361084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,fp8,0,0.47511677742004393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,fp8,fp8,0,0.47593278884887696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,64,8,128,1,float16,float16,0,0.8292032241821289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,fp8,0,0.36351039409637453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,fp8,fp8,0,0.36515040397644044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,float16,0,0.2890703916549683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,float16,fp8,0,0.24594399929046631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,1,128,1,fp8,fp8,0,0.24458720684051513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,float16,0,0.30737600326538084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,fp8,0,0.24500799179077148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,float16,fp8,0,0.24582879543304442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,64,128,1,float16,float16,0,1.4989968299865724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,2,128,1,fp8,fp8,0,0.24435040950775147
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,float16,float16,0,0.3466304063796997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,4,128,1,fp8,fp8,0,0.24579520225524903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,fp8,0,0.24523038864135743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,float16,float16,0,0.4244815826416016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,64,8,128,1,fp8,fp8,0,0.2459552049636841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,fp8,0,0.19056639671325684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,fp8,fp8,0,0.1895408034324646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,float16,0,0.16050560474395753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,64,128,1,float16,float16,0,0.7633552074432373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,float16,fp8,0,0.12991360425949097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,1,128,1,fp8,fp8,0,0.12997440099716187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,float16,0,0.1688447952270508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,float16,fp8,0,0.1299471974372864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,2,128,1,fp8,fp8,0,0.13015040159225463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,fp8,fp8,0,0.13000479936599732
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,float16,0,0.18807519674301149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,float16,fp8,0,0.1298848032951355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,float16,0,0.3962032079696655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,4,128,1,fp8,fp8,0,0.13122719526290894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,float16,0,0.2282032012939453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,64,8,128,1,float16,fp8,0,0.1302240014076233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,float16,fp8,0,0.10304479598999024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,fp8,0,0.07210239768028259
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,64,128,1,fp8,fp8,0,0.10286719799041748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,float16,0,0.09389280080795288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,float16,fp8,0,0.0711184024810791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,1,128,1,fp8,fp8,0,0.07238559722900391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,float16,0,0.12783679962158204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,float16,float16,0,0.10163999795913696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,2,128,1,fp8,fp8,0,0.0717408001422882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,fp8,0,0.05901920199394226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,float16,0,0.10862239599227905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,float16,fp8,0,0.0727567970752716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,4,128,1,fp8,fp8,0,0.07203360199928284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,float16,fp8,0,0.07232000231742859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,64,8,128,1,fp8,fp8,0,0.07250400185585022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,float16,float16,0,0.21150240898132325
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,float16,0,0.05637120008468628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,64,128,1,fp8,fp8,0,0.05811200141906738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,float16,fp8,0,0.04150879979133606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,1,128,1,fp8,fp8,0,0.042135998606681824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,float16,0,0.05628640055656433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,float16,fp8,0,0.04136959910392761
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,2,128,1,fp8,fp8,0,0.04158560037612915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,float16,0,0.061715197563171384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,float16,fp8,0,0.04211359918117523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,4,128,1,fp8,fp8,0,0.04208639860153198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,float16,0,0.0686896026134491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,float16,fp8,0,0.041526401042938234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,64,8,128,1,fp8,fp8,0,0.041654399037361144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,float16,0,0.11268960237503052
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,float16,fp8,0,0.034971201419830324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,64,128,1,fp8,fp8,0,0.034887999296188354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,float16,0,0.03940480053424835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,float16,fp8,0,0.027086400985717775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,1,128,1,fp8,fp8,0,0.02690880000591278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,float16,0,0.04018400013446808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,float16,fp8,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,2,128,1,fp8,fp8,0,0.026952001452445983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,float16,0,0.03962079882621765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,float16,fp8,0,0.02685439884662628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,4,128,1,fp8,fp8,0,0.02693600058555603
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,float16,0,0.045284798741340636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,float16,fp8,0,0.02691679894924164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,64,8,128,1,fp8,fp8,0,0.027009600400924684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,float16,0,0.05365440249443054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,float16,fp8,0,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,64,128,1,fp8,fp8,0,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,float16,0,0.028916800022125246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,float16,fp8,0,0.01658399999141693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,1,128,1,fp8,fp8,0,0.016862399876117706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,float16,0,0.029028800129890443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,float16,fp8,0,0.016711999475955964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,2,128,1,fp8,fp8,0,0.01746080070734024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,float16,0,0.028839999437332155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,float16,fp8,0,0.016726399958133697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,4,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,float16,0,0.02893120050430298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,float16,fp8,0,0.01775040030479431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,64,8,128,1,fp8,fp8,0,0.017499199509620665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,fp8,0,0.7472544193267823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,float16,float16,0,0.847708797454834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,1,128,1,fp8,fp8,0,0.748195219039917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,fp8,0,0.74617600440979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,float16,float16,0,0.9290351867675781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,2,128,1,fp8,fp8,0,0.746723222732544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,fp8,0,0.7467455863952637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,fp8,fp8,0,0.7454063892364502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,4,128,1,float16,float16,0,1.0878496170043945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,fp8,0,0.7474448204040527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,fp8,fp8,0,0.7459504127502441
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,fp8,0,0.6198416233062745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,float16,0,0.43831839561462405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,float16,fp8,0,0.37958240509033203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,64,8,128,1,float16,float16,0,1.4089920043945312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,fp8,fp8,0,0.617196798324585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,1,128,1,fp8,fp8,0,0.3796015977859497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,float16,0,0.47706241607666017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,float16,fp8,0,0.38034079074859617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,2,128,1,fp8,fp8,0,0.3793503999710083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,float16,0,0.5558335781097412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,float16,fp8,0,0.37955360412597655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,4,128,1,fp8,fp8,0,0.3800384044647217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,fp8,fp8,0,0.3803247928619385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,fp8,0,0.37951040267944336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,64,128,1,float16,float16,0,2.8607919692993162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,64,8,128,1,float16,float16,0,0.7146687984466553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,fp8,0,0.3137648105621338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,fp8,fp8,0,0.3130032062530518
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,float16,0,0.23230400085449218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,float16,fp8,0,0.19505120515823365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,1,128,1,fp8,fp8,0,0.19463200569152833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,float16,0,0.25351519584655763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,float16,fp8,0,0.19527039527893067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,64,128,1,float16,float16,0,1.4404848098754883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,2,128,1,fp8,fp8,0,0.1950719952583313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,float16,0,0.2904560089111328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,float16,fp8,0,0.1952031970024109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,fp8,0,0.16211520433425902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,4,128,1,fp8,fp8,0,0.19509600400924682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,float16,0,0.37008960247039796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,float16,fp8,0,0.19564160108566284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,64,8,128,1,fp8,fp8,0,0.19527519941329957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,fp8,fp8,0,0.16209919452667237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,float16,0,0.13084319829940796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,float16,fp8,0,0.10272799730300904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,64,128,1,float16,float16,0,0.7338655948638916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,1,128,1,fp8,fp8,0,0.10234880447387695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,float16,0,0.13978240489959717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,float16,fp8,0,0.10276319980621337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,2,128,1,fp8,fp8,0,0.10316319465637207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,float16,0,0.15859999656677246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,float16,fp8,0,0.10272799730300904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,4,128,1,fp8,fp8,0,0.10304479598999024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,float16,0,0.19774240255355835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,float16,fp8,0,0.1029520034790039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,64,8,128,1,fp8,fp8,0,0.10299999713897705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,fp8,0,0.08793759942054749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,float16,float16,0,0.3797247886657715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,64,128,1,fp8,fp8,0,0.0866815984249115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,float16,0,0.07614079713821412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,float16,fp8,0,0.05551999807357788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,1,128,1,fp8,fp8,0,0.055632001161575316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,float16,0,0.08252639770507812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,float16,fp8,0,0.0554639995098114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,2,128,1,fp8,fp8,0,0.055460798740386966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,float16,0,0.09090240001678467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,float16,fp8,0,0.05576800107955933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,4,128,1,fp8,fp8,0,0.05549280047416687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,float16,0,0.11084799766540528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,float16,fp8,0,0.05596320033073425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,64,8,128,1,fp8,fp8,0,0.05679519772529602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,float16,0,0.20342719554901123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,float16,fp8,0,0.050620800256729125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,64,128,1,fp8,fp8,0,0.05042719841003418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,float16,0,0.046744000911712644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,float16,fp8,0,0.03421440124511719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,1,128,1,fp8,fp8,0,0.034441599249839784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,float16,0,0.047286400198936464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,float16,fp8,0,0.034355199337005614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,2,128,1,fp8,fp8,0,0.03377600014209747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,float16,0,0.054425597190856934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,float16,fp8,0,0.033934399485588074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,4,128,1,fp8,fp8,0,0.03410719931125641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,float16,0,0.06125760078430176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,float16,fp8,0,0.034148800373077395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,64,8,128,1,fp8,fp8,0,0.03454079926013946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,float16,0,0.10395679473876954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,float16,fp8,0,0.028830400109291075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,fp8,fp8,0,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,64,128,1,fp8,fp8,0,0.02881760001182556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,float16,0,0.0331743985414505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,1,128,1,fp8,fp8,0,0.020772799849510193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,float16,0,0.03312639892101288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,2,128,1,float16,fp8,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,fp8,0,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,fp8,fp8,0,0.020612800121307374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,4,128,1,float16,float16,0,0.03311359882354736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,float16,0,0.03815839886665344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,float16,fp8,0,0.020788800716400147
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,64,8,128,1,fp8,fp8,0,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,float16,0,0.05145919919013977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,float16,fp8,0,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,64,128,1,fp8,fp8,0,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,float16,0,0.026465600728988646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,1,128,1,fp8,fp8,0,0.014604799449443817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,float16,0,0.026841598749160766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,float16,0,0.024886399507522583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,float16,fp8,0,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,2,128,1,fp8,fp8,0,0.014519999921321868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,float16,0,0.02491360008716583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,4,128,1,fp8,fp8,0,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,float16,fp8,0,0.014539200067520141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,64,8,128,1,fp8,fp8,0,0.014502400159835815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,float16,0,0.034753599762916566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,float16,fp8,0,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,fp8,fp8,0,0.012567999958992004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,64,128,1,fp8,fp8,0,0.015718400478363037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,float16,0,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,float16,fp8,0,0.013760000467300415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,1,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,float16,0,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,2,128,1,float16,fp8,0,0.013963200151920319
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,float16,0,0.02279520034790039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,float16,fp8,0,0.012729600071907043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,4,128,1,fp8,fp8,0,0.013875199854373932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,float16,0,0.02335360050201416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,float16,fp8,0,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,64,8,128,1,fp8,fp8,0,0.013208000361919403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,fp8,fp8,0,0.46528801918029783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,float16,0,0.5296527862548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,1,128,1,float16,fp8,0,0.4647503852844238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,float16,0,0.5686351776123046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,float16,fp8,0,0.46433439254760744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,2,128,1,fp8,fp8,0,0.4640336036682129
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,fp8,0,0.46321759223937986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,float16,float16,0,0.6462944030761719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,4,128,1,fp8,fp8,0,0.4638976097106934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,fp8,0,0.4630447864532471
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,float16,float16,0,0.806608009338379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,64,8,128,1,fp8,fp8,0,0.4632927894592285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,fp8,0,0.3559567928314209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,fp8,fp8,0,0.23798720836639403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,fp8,fp8,0,0.3561232089996338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,float16,0,0.2776959896087646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,1,128,1,float16,fp8,0,0.23809120655059815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,float16,0,0.2982800006866455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,float16,fp8,0,0.23803999423980712
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,2,128,1,fp8,fp8,0,0.23807520866394044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,64,128,1,float16,float16,0,1.479956817626953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,float16,0,0.33568320274353025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,float16,fp8,0,0.2377232074737549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,4,128,1,fp8,fp8,0,0.23787999153137207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,float16,0,0.4130943775177002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,float16,fp8,0,0.23752961158752442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,64,8,128,1,fp8,fp8,0,0.2373215913772583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,fp8,0,0.18315039873123168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,fp8,fp8,0,0.18259840011596679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,float16,0,0.15390559434890747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,64,128,1,float16,float16,0,0.7522784233093261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,fp8,fp8,0,0.12401599884033203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,float16,0,0.18147039413452148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,float16,fp8,0,0.1243664026260376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,1,128,1,fp8,fp8,0,0.12476639747619629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,float16,0,0.1621135950088501
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,2,128,1,float16,fp8,0,0.12373759746551513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,float16,fp8,0,0.1237104058265686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,4,128,1,fp8,fp8,0,0.12386239767074585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,float16,0,0.22031679153442382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,float16,fp8,0,0.12474559545516968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,64,8,128,1,fp8,fp8,0,0.12475680112838745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,fp8,0,0.09655519723892211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,float16,float16,0,0.38838720321655273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,64,128,1,fp8,fp8,0,0.0965839982032776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,float16,0,0.08883519768714905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,float16,fp8,0,0.0662064015865326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,1,128,1,fp8,fp8,0,0.0664896011352539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,float16,0,0.09525600075721741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,float16,fp8,0,0.06664959788322448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,2,128,1,fp8,fp8,0,0.0668063998222351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,float16,0,0.10330400466918946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,float16,fp8,0,0.06690719723701477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,4,128,1,fp8,fp8,0,0.06630880236625672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,float16,0,0.12262239456176757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,float16,fp8,0,0.06761599779129028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,64,8,128,1,fp8,fp8,0,0.06660799980163574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,float16,0,0.20643839836120606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,float16,fp8,0,0.05347679853439331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,64,128,1,fp8,fp8,0,0.053539198637008664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,float16,0,0.05133439898490906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,float16,fp8,0,0.037110400199890134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,1,128,1,fp8,fp8,0,0.03719840049743652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,fp8,fp8,0,0.03712159991264343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,float16,0,0.05265439748764038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,fp8,fp8,0,0.03711200058460236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,float16,0,0.05958399772644043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,fp8,0,0.030939200520515443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,2,128,1,float16,fp8,0,0.03714720010757446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,4,128,1,float16,fp8,0,0.03707360029220581
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,float16,0,0.06799039840698243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,float16,fp8,0,0.03714239895343781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,64,8,128,1,fp8,fp8,0,0.03710399866104126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,float16,float16,0,0.10969599485397338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,float16,0,0.03622080087661743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,float16,fp8,0,0.02287680059671402
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,1,128,1,fp8,fp8,0,0.02274879962205887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,64,128,1,fp8,fp8,0,0.03089759945869446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,float16,0,0.0371535986661911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,float16,fp8,0,0.02288320064544678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,2,128,1,fp8,fp8,0,0.022951999306678773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,float16,0,0.03701280057430267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,float16,fp8,0,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,4,128,1,fp8,fp8,0,0.02295999974012375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,float16,0,0.04162560105323791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,float16,fp8,0,0.022710399329662324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,64,8,128,1,fp8,fp8,0,0.022998400032520294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,float16,0,0.05168160200119019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,float16,fp8,0,0.01876160055398941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,64,128,1,fp8,fp8,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,float16,0,0.02685759961605072
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,float16,fp8,0,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,1,128,1,fp8,fp8,0,0.015182399749755859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,float16,0,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,fp8,0,0.014988799393177033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,float16,fp8,0,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,2,128,1,fp8,fp8,0,0.014535999298095703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,float16,0,0.02701599895954132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,float16,fp8,0,0.014550399780273438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,4,128,1,fp8,fp8,0,0.014579200744628906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,float16,float16,0,0.027008000016212463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,64,8,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,float16,0,0.03300159871578216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,float16,fp8,0,0.012595200538635254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,64,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,float16,0,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,1,128,1,fp8,fp8,0,0.01061599999666214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,float16,0,0.02237119972705841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,float16,0,0.020921599864959717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,2,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,float16,0,0.020657600462436677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,4,128,1,fp8,fp8,0,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,float16,fp8,0,0.010555200278759003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,64,8,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,float16,0,0.024889600276947022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,float16,fp8,0,0.011844799667596818
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,64,128,1,fp8,fp8,0,0.010585600137710571
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,float16,0,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,1,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,float16,float16,0,0.02067199945449829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,2,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,float16,0,0.019233599305152893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,4,128,1,fp8,fp8,0,0.010539200156927109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,float16,0,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,64,8,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,float16,0,0.44561119079589845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,fp8,fp8,0,0.38555359840393066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,1,128,1,float16,fp8,0,0.3853247880935669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,float16,0,0.4660031795501709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,float16,fp8,0,0.38491361141204833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,2,128,1,fp8,fp8,0,0.3852463960647583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,float16,0,0.5049071788787842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,float16,fp8,0,0.38416318893432616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,4,128,1,fp8,fp8,0,0.38476641178131105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,float16,0,0.583568000793457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,fp8,0,0.25570878982543943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,float16,fp8,0,0.3843440055847168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,float16,float16,0,0.8391008377075195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,64,8,128,1,fp8,fp8,0,0.3842479944229126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,64,128,1,fp8,fp8,0,0.25643680095672605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,float16,0,0.23833439350128174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,float16,fp8,0,0.19728959798812867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,1,128,1,fp8,fp8,0,0.19748799800872802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,fp8,0,0.19778079986572267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,float16,float16,0,0.2448944091796875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,2,128,1,fp8,fp8,0,0.19733599424362183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,float16,0,0.30322880744934083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,float16,0,0.2648128032684326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,float16,fp8,0,0.197160005569458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,4,128,1,fp8,fp8,0,0.19728319644927977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,float16,fp8,0,0.1978592038154602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,64,8,128,1,fp8,fp8,0,0.197270405292511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,fp8,0,0.13383040428161622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,float16,float16,0,0.4321743965148926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,64,128,1,fp8,fp8,0,0.13394240140914918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,float16,0,0.12952959537506104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,float16,0,0.14528800249099733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,float16,fp8,0,0.10358239412307739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,1,128,1,fp8,fp8,0,0.10419199466705323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,fp8,0,0.10357439517974854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,float16,float16,0,0.13860960006713868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,2,128,1,fp8,fp8,0,0.10319360494613647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,float16,fp8,0,0.10345120429992676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,4,128,1,fp8,fp8,0,0.10365920066833496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,float16,0,0.16505119800567628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,float16,fp8,0,0.10374079942703247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,64,8,128,1,fp8,fp8,0,0.10376800298690796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,float16,0,0.2255631923675537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,float16,fp8,0,0.07215359807014465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,64,128,1,fp8,fp8,0,0.07201120257377625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,float16,0,0.073852801322937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,float16,fp8,0,0.05610560178756714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,1,128,1,fp8,fp8,0,0.05615519881248474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,float16,0,0.07450720071792602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,float16,0,0.08949599862098694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,float16,fp8,0,0.05568640232086182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,2,128,1,fp8,fp8,0,0.05604640245437622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,float16,0,0.08157119750976563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,float16,fp8,0,0.055961602926254274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,4,128,1,fp8,fp8,0,0.056174397468566895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,float16,fp8,0,0.056139200925827026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,64,8,128,1,fp8,fp8,0,0.05565599799156189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,fp8,0,0.039084801077842714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,fp8,fp8,0,0.03931199908256531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,64,128,1,float16,float16,0,0.12101600170135499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,float16,0,0.046430400013923644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,float16,fp8,0,0.03261759877204895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,1,128,1,fp8,fp8,0,0.032267200946807864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,float16,0,0.04705280065536499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,float16,fp8,0,0.03134720027446747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,2,128,1,fp8,fp8,0,0.032020801305770875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,float16,0,0.047302401065826415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,float16,fp8,0,0.03260639905929565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,4,128,1,fp8,fp8,0,0.03208320140838623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,float16,0,0.051344001293182374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,float16,fp8,0,0.03149920105934143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,64,8,128,1,fp8,fp8,0,0.032969599962234496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,float16,0,0.05770080089569092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,float16,fp8,0,0.024740800261497498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,float16,0,0.033139199018478394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,64,128,1,fp8,fp8,0,0.024743999540805816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,float16,fp8,0,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,1,128,1,fp8,fp8,0,0.020632000267505647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,float16,0,0.0330128014087677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,float16,fp8,0,0.020710399746894835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,2,128,1,fp8,fp8,0,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,float16,0,0.033118399977684024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,float16,fp8,0,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,4,128,1,fp8,fp8,0,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,float16,0,0.03312639892101288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,float16,fp8,0,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,64,8,128,1,fp8,fp8,0,0.0208079993724823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,float16,0,0.03598720133304596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,float16,fp8,0,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,64,128,1,fp8,fp8,0,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,float16,0,0.0248416006565094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,1,128,1,fp8,fp8,0,0.014496000111103058
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,float16,0,0.02478239983320236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,float16,fp8,0,0.014529600739479065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,2,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,float16,0,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,float16,fp8,0,0.01449279934167862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,4,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,float16,0,0.025012800097465517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,float16,fp8,0,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,64,8,128,1,fp8,fp8,0,0.014446400105953217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,float16,0,0.025787198543548585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,64,128,1,fp8,fp8,0,0.010609599947929382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,float16,0,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,1,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,float16,0,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,2,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,float16,0,0.0199072003364563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,float16,0,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,4,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,64,8,128,1,fp8,fp8,0,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,fp8,fp8,0,0.010364799946546554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,float16,0,0.01882079988718033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,64,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,1,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,2,128,1,float16,fp8,0,0.010555200278759003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,float16,0,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,float16,fp8,0,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,4,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,float16,0,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,64,8,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,float16,0,0.412286376953125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,float16,fp8,0,0.3510416030883789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,1,128,1,fp8,fp8,0,0.35184640884399415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,float16,0,0.4242688179016113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,float16,fp8,0,0.3521775960922241
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,2,128,1,fp8,fp8,0,0.35088961124420165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,float16,0,0.43581280708312986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,float16,fp8,0,0.35238559246063234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,4,128,1,fp8,fp8,0,0.3527888059616089
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,float16,0,0.47426400184631345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,float16,fp8,0,0.3524832010269165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,64,8,128,1,fp8,fp8,0,0.3511375904083252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,fp8,0,0.2097071886062622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,fp8,fp8,0,0.2094496011734009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,float16,0,0.22878561019897461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,64,128,1,float16,float16,0,0.5193024158477784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,float16,0,0.21611359119415283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,float16,fp8,0,0.17944799661636351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,1,128,1,fp8,fp8,0,0.178713595867157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,float16,fp8,0,0.17893600463867188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,2,128,1,fp8,fp8,0,0.17943520545959474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,float16,0,0.2311408042907715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,float16,fp8,0,0.18046720027923585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,4,128,1,fp8,fp8,0,0.17907359600067138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,float16,0,0.25054240226745605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,float16,0,0.11993759870529175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,float16,fp8,0,0.17942559719085693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,64,8,128,1,fp8,fp8,0,0.17918879985809327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,float16,0,0.1208624005317688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,float16,0,0.27349441051483153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,fp8,fp8,0,0.09321119785308837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,float16,fp8,0,0.10995999574661255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,64,128,1,fp8,fp8,0,0.10971039533615112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,float16,fp8,0,0.09322400093078613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,1,128,1,fp8,fp8,0,0.09327359795570374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,fp8,0,0.09299359917640686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,2,128,1,float16,fp8,0,0.09357600212097168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,float16,0,0.12792960405349732
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,float16,fp8,0,0.09302880167961121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,4,128,1,fp8,fp8,0,0.09332799911499023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,float16,float16,0,0.1376207947731018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,64,8,128,1,fp8,fp8,0,0.09328320026397705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,float16,0,0.14476959705352782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,float16,fp8,0,0.058108800649642946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,64,128,1,fp8,fp8,0,0.0582256019115448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,float16,0,0.07106239795684814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,float16,fp8,0,0.05145440101623535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,1,128,1,fp8,fp8,0,0.05145279765129089
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,fp8,fp8,0,0.051291197538375854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,float16,0,0.07035359740257263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,float16,fp8,0,0.05135520100593567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,2,128,1,fp8,fp8,0,0.05150560140609741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,float16,0,0.07008000016212464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,4,128,1,float16,fp8,0,0.051571202278137204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,float16,0,0.06998400092124939
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,float16,0,0.07580479979515076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,float16,fp8,0,0.05114079713821411
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,64,8,128,1,fp8,fp8,0,0.051451200246810914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,float16,fp8,0,0.033339199423789975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,64,128,1,fp8,fp8,0,0.03317599892616272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,float16,0,0.04498879909515381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,float16,fp8,0,0.030755200982093812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,1,128,1,fp8,fp8,0,0.02999039888381958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,float16,0,0.04495840072631836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,float16,fp8,0,0.030460798740386964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,2,128,1,fp8,fp8,0,0.030792000889778137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,fp8,fp8,0,0.030561599135398864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,float16,0,0.0452239990234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,float16,0,0.04225760102272034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,float16,fp8,0,0.03022879958152771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,4,128,1,fp8,fp8,0,0.030670401453971863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,float16,0,0.0452208012342453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,64,8,128,1,float16,fp8,0,0.029950401186943053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,float16,fp8,0,0.02110240012407303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,fp8,0,0.01912959963083267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,64,128,1,fp8,fp8,0,0.020817600190639496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,float16,0,0.03089439868927002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,float16,fp8,0,0.019760000705718993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,1,128,1,fp8,fp8,0,0.01982239931821823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,float16,float16,0,0.030985599756240843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,2,128,1,fp8,fp8,0,0.019886399805545806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,float16,0,0.030883198976516722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,float16,fp8,0,0.018934400379657747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,float16,0,0.03091840147972107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,4,128,1,fp8,fp8,0,0.020179200172424316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,float16,fp8,0,0.020032000541687012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,64,8,128,1,fp8,fp8,0,0.01895360052585602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,float16,0,0.028832000494003297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,fp8,0,0.01377120018005371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,float16,fp8,0,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,64,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,float16,0,0.02268799990415573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,float16,fp8,0,0.01266240030527115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,1,128,1,fp8,fp8,0,0.012591999769210816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,float16,float16,0,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,2,128,1,fp8,fp8,0,0.013583999872207642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,float16,0,0.022759999334812164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,4,128,1,fp8,fp8,0,0.014452800154685974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,float16,0,0.023284800350666046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,float16,fp8,0,0.01332319974899292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,64,8,128,1,fp8,fp8,0,0.012585599720478059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,float16,0,0.02053920030593872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,64,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,1,128,1,fp8,fp8,0,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,float16,0,0.0188960000872612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,fp8,0,0.01051200032234192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,4,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,float16,float16,0,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,64,8,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,float16,float16,0,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,64,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,1,128,1,fp8,fp8,0,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,float16,0,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,float16,fp8,0,0.00883520022034645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,2,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,float16,float16,0,0.018747200071811677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,4,128,1,fp8,fp8,0,0.00910239964723587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,float16,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,64,8,128,1,fp8,fp8,0,0.00902559980750084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,float16,0,0.40414719581604003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,float16,fp8,0,0.33341279029846194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,1,128,1,fp8,fp8,0,0.33292160034179685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,float16,0,0.41123042106628416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,float16,fp8,0,0.33282558917999266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,2,128,1,fp8,fp8,0,0.3351871967315674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,float16,0,0.4205535888671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,float16,fp8,0,0.33318400382995605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,4,128,1,fp8,fp8,0,0.33250401020050047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,float16,0,0.44683518409729006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,float16,fp8,0,0.33271520137786864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,64,8,128,1,fp8,fp8,0,0.33287041187286376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,fp8,0,0.18811359405517578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,float16,float16,0,0.367743992805481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,64,128,1,fp8,fp8,0,0.18664000034332276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,fp8,fp8,0,0.17124799489974976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,float16,0,0.21803200244903564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,float16,fp8,0,0.16898399591445923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,fp8,0,0.16953279972076415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,1,128,1,fp8,fp8,0,0.16948959827423096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,float16,0,0.21685121059417725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,2,128,1,float16,fp8,0,0.16966880559921266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,float16,float16,0,0.2243488073348999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,4,128,1,fp8,fp8,0,0.1708575963973999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,float16,0,0.2339632034301758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,float16,fp8,0,0.1695520043373108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,fp8,0,0.09057279825210571
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,64,8,128,1,fp8,fp8,0,0.16940000057220458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,float16,0,0.19556000232696533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,float16,fp8,0,0.09684479832649232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,64,128,1,fp8,fp8,0,0.09728320240974427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,float16,float16,0,0.11740800142288207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,1,128,1,fp8,fp8,0,0.08976160287857056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,float16,0,0.11748160123825073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,float16,fp8,0,0.09039040207862854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,float16,0,0.12289919853210449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,2,128,1,fp8,fp8,0,0.08936799764633178
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,float16,0,0.11682560443878173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,float16,fp8,0,0.08968639969825745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,4,128,1,fp8,fp8,0,0.08987039923667908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,float16,fp8,0,0.08932319879531861
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,float16,0,0.09474560022354125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,64,8,128,1,fp8,fp8,0,0.09019200205802917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,float16,fp8,0,0.0535968005657196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,64,128,1,fp8,fp8,0,0.05355839729309082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,float16,0,0.06935679912567139
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,float16,fp8,0,0.04994400143623352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,1,128,1,fp8,fp8,0,0.04995200037956238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,float16,0,0.07003039717674256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,float16,fp8,0,0.05013279914855957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,2,128,1,fp8,fp8,0,0.049779200553894044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,float16,0,0.06920959949493408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,fp8,0,0.031052801012992858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,float16,fp8,0,0.049711999297142026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,4,128,1,fp8,fp8,0,0.049953600764274596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,float16,0,0.06898720264434814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,float16,fp8,0,0.04989120066165924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,64,8,128,1,fp8,fp8,0,0.04982880055904389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,float16,float16,0,0.05442399978637695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,float16,0,0.04334239959716797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,64,128,1,fp8,fp8,0,0.030934399366378783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,float16,fp8,0,0.028883200883865357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,1,128,1,fp8,fp8,0,0.029473599791526795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,float16,0,0.04327200055122375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,float16,fp8,0,0.029032000899314882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,2,128,1,fp8,fp8,0,0.02935839891433716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,fp8,0,0.0295199990272522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,float16,float16,0,0.0433135986328125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,4,128,1,fp8,fp8,0,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,float16,0,0.043377599120140074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,float16,fp8,0,0.029128000140190125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,64,8,128,1,fp8,fp8,0,0.02911840081214905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,float16,0,0.029023998975753786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,float16,0,0.03522560000419617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,float16,fp8,0,0.02061759978532791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,64,128,1,fp8,fp8,0,0.020740799605846405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,float16,0,0.02945440113544464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,float16,fp8,0,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,1,128,1,fp8,fp8,0,0.018947200477123262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,float16,fp8,0,0.019088000059127808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,2,128,1,fp8,fp8,0,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,float16,0,0.03028639853000641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,float16,fp8,0,0.018991999328136444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,4,128,1,fp8,fp8,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,float16,0,0.03007520139217377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,64,8,128,1,fp8,fp8,0,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,float16,0,0.023996800184249878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,fp8,0,0.012849600613117218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,float16,fp8,0,0.012999999523162841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,64,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,float16,0,0.022703999280929567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,fp8,fp8,0,0.012668800354003907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,1,128,1,fp8,fp8,0,0.012555199861526489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,float16,float16,0,0.022668799757957457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,2,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,float16,0,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,4,128,1,float16,fp8,0,0.01249919980764389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,float16,0,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,float16,fp8,0,0.012744000554084778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,64,8,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,float16,0,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,64,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,float16,0,0.018798400461673737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,1,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,float16,0,0.018875199556350707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,2,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,float16,float16,0,0.01908479928970337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,4,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,64,8,128,1,float16,float16,0,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,float16,0,0.01881439983844757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,float16,fp8,0,0.009622400254011154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,64,128,1,fp8,fp8,0,0.009696000069379807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,float16,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,fp8,0,0.008550400286912918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,1,128,1,fp8,fp8,0,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,2,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,float16,float16,0,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,4,128,1,fp8,fp8,0,0.008782400190830231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,float16,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,float16,fp8,0,0.010371199995279311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,64,8,128,1,fp8,fp8,0,0.01035040020942688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,fp8,0,17.292568969726563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,fp8,fp8,0,17.307196044921874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,fp8,0,17.320535278320314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,fp8,fp8,0,17.362889099121094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,1,128,1,float16,float16,0,21.819419860839844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,2,128,1,float16,float16,0,22.2553955078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,float16,0,22.470631408691407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,float16,fp8,0,17.29853515625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,fp8,0,8.994731140136718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,fp8,fp8,0,8.92728500366211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,float16,0,11.643294525146484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,4,128,1,fp8,fp8,0,17.699002075195313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,fp8,0,17.757791137695314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,fp8,fp8,0,17.739775085449217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,48,128,1,float16,float16,0,14.316424560546874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,48,8,128,1,float16,float16,0,23.64081573486328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,float16,fp8,0,8.785910034179688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,1,128,1,fp8,fp8,0,8.640702056884766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,fp8,0,8.848073577880859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,fp8,fp8,0,8.872901153564452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,fp8,0,8.712531280517577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,2,128,1,float16,float16,0,11.165245056152344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,float16,float16,0,11.663162994384766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,fp8,0,4.575497436523437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,4,128,1,fp8,fp8,0,8.72979965209961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,fp8,fp8,0,4.773712158203125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,48,128,1,float16,float16,0,7.19214096069336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,fp8,0,8.845699310302734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,fp8,fp8,0,8.892330932617188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,float16,0,5.739545440673828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,float16,fp8,0,4.434073638916016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,48,8,128,1,float16,float16,0,11.865446472167969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,1,128,1,fp8,fp8,0,4.346324920654297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,float16,0,5.508361434936523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,fp8,fp8,0,4.339038467407226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,2,128,1,float16,fp8,0,4.4380638122558596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,fp8,0,4.635411071777344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,float16,float16,0,5.5752208709716795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,4,128,1,fp8,fp8,0,4.37592658996582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,fp8,0,4.338683319091797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,fp8,0,2.357088088989258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,float16,float16,0,3.5222705841064452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,float16,float16,0,5.924703979492188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,48,128,1,fp8,fp8,0,2.291697692871094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,float16,0,2.7021615982055662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,float16,fp8,0,2.228158378601074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,48,8,128,1,fp8,fp8,0,4.741372680664062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,1,128,1,fp8,fp8,0,2.21978874206543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,float16,0,2.6177759170532227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,float16,fp8,0,2.4283519744873048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,2,128,1,fp8,fp8,0,2.732111930847168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,float16,0,2.7416624069213866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,fp8,fp8,0,2.222281646728516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,fp8,0,2.210643196105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,4,128,1,float16,fp8,0,2.834089660644531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,float16,float16,0,2.7753087997436525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,48,8,128,1,fp8,fp8,0,2.794771194458008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,fp8,0,10.001980590820313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,fp8,fp8,0,10.166737365722657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,fp8,0,10.214514923095702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,fp8,fp8,0,10.076670074462891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,1,128,1,float16,float16,0,12.670033264160157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,fp8,0,10.048451232910157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,2,128,1,float16,float16,0,12.944668579101563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,float16,float16,0,13.216499328613281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,fp8,0,5.2871856689453125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,fp8,fp8,0,5.650263977050781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,4,128,1,fp8,fp8,0,10.245062255859375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,float16,0,6.4229484558105465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,48,128,1,float16,float16,0,8.87024154663086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,fp8,0,10.27890396118164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,fp8,fp8,0,10.351225280761719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,float16,fp8,0,5.131076812744141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,48,8,128,1,float16,float16,0,13.768479919433593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,1,128,1,fp8,fp8,0,4.976704025268555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,fp8,0,5.3729248046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,fp8,fp8,0,5.009326553344726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,fp8,0,5.083879852294922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,2,128,1,float16,float16,0,6.571036529541016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,float16,float16,0,6.539854431152344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,4,128,1,fp8,fp8,0,5.110766220092773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,fp8,0,5.059220886230468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,fp8,0,2.7212400436401367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,fp8,fp8,0,2.913777542114258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,fp8,0,2.543395233154297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,48,128,1,float16,float16,0,4.325723266601562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,float16,float16,0,3.1014047622680665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,float16,float16,0,6.8889617919921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,48,8,128,1,fp8,fp8,0,5.022401428222656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,1,128,1,fp8,fp8,0,2.547230339050293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,fp8,0,2.548102378845215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,float16,float16,0,3.1611455917358398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,2,128,1,fp8,fp8,0,2.5410879135131834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,fp8,0,2.796703910827637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,float16,float16,0,2.9915632247924804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,4,128,1,fp8,fp8,0,2.758564758300781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,float16,0,3.228353500366211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,float16,fp8,0,2.580731201171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,fp8,0,1.374783992767334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,48,8,128,1,fp8,fp8,0,2.5337808609008787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,float16,float16,0,2.4883615493774416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,48,128,1,fp8,fp8,0,1.4530672073364257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,float16,0,1.7509920120239257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,float16,fp8,0,1.5888416290283203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,1,128,1,fp8,fp8,0,1.3149104118347168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,float16,0,1.5069919586181642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,float16,fp8,0,1.317460823059082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,2,128,1,fp8,fp8,0,1.4634127616882324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,float16,0,1.5246159553527832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,float16,fp8,0,1.5715744018554687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,4,128,1,fp8,fp8,0,1.421731185913086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,float16,0,1.5963215827941895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,float16,fp8,0,1.5961647987365724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,48,8,128,1,fp8,fp8,0,1.2917296409606933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,fp8,0,7.189403533935547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,fp8,fp8,0,7.091515350341797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,fp8,fp8,0,7.190704345703125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,fp8,0,7.521198272705078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,1,128,1,float16,float16,0,8.865563201904298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,2,128,1,float16,float16,0,9.190943908691406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,fp8,0,7.0500732421875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,float16,float16,0,9.29843521118164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,fp8,fp8,0,3.7771934509277343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,fp8,0,4.412579345703125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,4,128,1,fp8,fp8,0,7.178913879394531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,float16,0,4.508044815063476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,48,128,1,float16,float16,0,6.4996482849121096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,fp8,0,7.197254180908203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,fp8,fp8,0,7.197286224365234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,48,8,128,1,float16,float16,0,9.567034912109374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,float16,fp8,0,3.498118209838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,1,128,1,fp8,fp8,0,3.5216625213623045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,fp8,0,3.7676944732666016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,float16,float16,0,4.469144058227539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,2,128,1,fp8,fp8,0,3.546963119506836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,fp8,0,3.5794910430908202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,float16,float16,0,4.6033073425292965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,4,128,1,fp8,fp8,0,3.774995040893555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,fp8,0,1.9157920837402345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,fp8,fp8,0,1.9347248077392578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,fp8,0,3.581492614746094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,float16,float16,0,4.886201477050781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,48,8,128,1,fp8,fp8,0,3.5719345092773436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,float16,0,2.443822479248047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,48,128,1,float16,float16,0,3.6692657470703125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,float16,fp8,0,1.7799983978271485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,1,128,1,fp8,fp8,0,1.7774368286132813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,fp8,0,1.8213024139404297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,float16,float16,0,2.1645391464233397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,2,128,1,fp8,fp8,0,2.275060844421387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,float16,0,2.1383840560913088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,fp8,fp8,0,1.8455232620239257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,4,128,1,float16,fp8,0,2.134027290344238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,fp8,0,1.7849744796752929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,fp8,fp8,0,1.7812047958374024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,48,8,128,1,float16,float16,0,2.297054481506348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,float16,0,1.9832271575927733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,float16,fp8,0,1.1024736404418944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,48,128,1,fp8,fp8,0,1.1970895767211913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,float16,0,1.0589615821838378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,float16,fp8,0,1.0578415870666504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,1,128,1,fp8,fp8,0,0.9282480239868164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,float16,0,1.0759039878845216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,float16,fp8,0,1.166651153564453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,2,128,1,fp8,fp8,0,1.1744175910949708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,float16,0,1.101700782775879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,float16,fp8,0,1.037440013885498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,4,128,1,fp8,fp8,0,1.007436752319336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,fp8,0,0.9237071990966796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,float16,float16,0,1.154201602935791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,48,8,128,1,fp8,fp8,0,0.9476048469543457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,fp8,0,9.25353775024414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,fp8,fp8,0,9.275183868408202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,fp8,0,9.28421630859375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,fp8,fp8,0,9.401725006103515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,fp8,0,9.4173583984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,1,128,1,float16,float16,0,11.541366577148438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,2,128,1,float16,float16,0,12.030531311035157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,float16,float16,0,12.098200225830078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,fp8,0,5.0797889709472654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,fp8,fp8,0,5.1249137878417965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,float16,0,5.975470352172851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,4,128,1,fp8,fp8,0,9.625390625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,fp8,0,9.59930419921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,48,128,1,float16,float16,0,9.151806640625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,fp8,fp8,0,9.718692779541016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,float16,fp8,0,4.6592529296875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,48,8,128,1,float16,float16,0,13.103622436523438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,1,128,1,fp8,fp8,0,4.688179016113281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,fp8,0,4.796942520141601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,fp8,fp8,0,4.700934219360351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,fp8,0,4.740016174316406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,2,128,1,float16,float16,0,6.1246990203857425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,float16,float16,0,6.090163040161133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,4,128,1,fp8,fp8,0,4.688415908813477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,fp8,0,4.682539367675782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,fp8,0,2.5250783920288087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,fp8,fp8,0,2.706056022644043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,fp8,fp8,0,4.7110145568847654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,float16,0,2.8489599227905273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,48,8,128,1,float16,float16,0,6.436991882324219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,48,128,1,float16,float16,0,4.554705429077148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,float16,fp8,0,2.3789520263671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,1,128,1,fp8,fp8,0,2.3731056213378907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,float16,0,2.754318428039551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,float16,fp8,0,2.348614311218262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,fp8,0,2.3531776428222657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,fp8,fp8,0,2.314401626586914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,2,128,1,fp8,fp8,0,2.6744735717773436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,4,128,1,float16,float16,0,2.920552062988281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,float16,0,3.092497634887695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,fp8,0,1.2666864395141602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,fp8,fp8,0,1.2699199676513673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,fp8,fp8,0,2.281252861022949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,48,128,1,float16,float16,0,2.294691276550293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,float16,0,1.3659199714660644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,48,8,128,1,float16,fp8,0,2.803803253173828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,float16,fp8,0,1.3804143905639648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,fp8,0,1.1803343772888184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,float16,float16,0,1.3825743675231934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,1,128,1,fp8,fp8,0,1.4767663955688477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,2,128,1,fp8,fp8,0,1.1995648384094237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,float16,0,1.4244112014770507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,float16,fp8,0,1.337707233428955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,4,128,1,fp8,fp8,0,1.2192815780639648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,float16,0,1.502742385864258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,fp8,0,0.672544002532959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,float16,fp8,0,1.1738256454467773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,fp8,fp8,0,0.7178959846496582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,float16,0,0.7219056129455567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,48,8,128,1,fp8,fp8,0,1.3429696083068847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,48,128,1,float16,float16,0,1.2581040382385253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,float16,fp8,0,0.6292975902557373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,1,128,1,fp8,fp8,0,0.6925024032592774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,fp8,0,0.6213664054870606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,float16,float16,0,0.720959997177124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,float16,0,0.7528480052947998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,2,128,1,fp8,fp8,0,0.6496240139007569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,float16,fp8,0,0.7156799793243408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,4,128,1,fp8,fp8,0,0.6544688224792481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,float16,0,0.7798160076141357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,float16,fp8,0,0.6947360038757324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,48,8,128,1,fp8,fp8,0,0.621779203414917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,fp8,fp8,0,5.368822479248047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,fp8,0,5.438260650634765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,fp8,0,5.368507385253906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,fp8,fp8,0,5.437516784667968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,fp8,0,5.3879646301269535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,1,128,1,float16,float16,0,6.748487854003907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,2,128,1,float16,float16,0,6.824002838134765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,float16,float16,0,7.035088348388672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,fp8,0,3.082921600341797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,fp8,fp8,0,3.5929664611816405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,float16,0,3.3382720947265625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,4,128,1,fp8,fp8,0,5.511649703979492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,fp8,0,5.50335693359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,fp8,fp8,0,5.474894332885742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,48,128,1,float16,float16,0,5.850638580322266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,float16,fp8,0,2.7295488357543944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,48,8,128,1,float16,float16,0,7.600526428222656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,1,128,1,fp8,fp8,0,2.7479568481445313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,fp8,0,2.7495855331420898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,fp8,fp8,0,2.7418544769287108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,2,128,1,float16,float16,0,3.41369743347168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,fp8,0,2.6770463943481446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,float16,float16,0,3.4762832641601564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,4,128,1,fp8,fp8,0,2.7293519973754883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,fp8,0,2.7096256256103515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,fp8,0,1.928544044494629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,fp8,fp8,0,1.4936112403869628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,float16,float16,0,3.858491134643555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,48,8,128,1,fp8,fp8,0,2.758483123779297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,float16,0,1.8365535736083984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,float16,fp8,0,1.439583969116211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,48,128,1,float16,float16,0,2.9757551193237304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,1,128,1,fp8,fp8,0,1.4264703750610352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,fp8,0,1.373528003692627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,float16,float16,0,1.6556207656860351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,2,128,1,fp8,fp8,0,1.6844976425170899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,fp8,fp8,0,1.3570896148681642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,float16,0,1.6649360656738281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,4,128,1,float16,fp8,0,1.710260772705078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,float16,0,1.7859151840209961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,float16,fp8,0,1.3662896156311035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,48,8,128,1,fp8,fp8,0,1.3640671730041505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,fp8,0,0.8587247848510742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,fp8,fp8,0,0.7794847965240479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,float16,0,0.8409328460693359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,float16,fp8,0,0.7545728206634521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,48,128,1,float16,float16,0,1.6743471145629882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,1,128,1,fp8,fp8,0,0.8309103965759277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,float16,0,0.8215359687805176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,float16,fp8,0,0.7093920230865478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,2,128,1,fp8,fp8,0,0.7841887950897217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,float16,0,0.8526687622070312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,float16,fp8,0,0.7772160053253174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,4,128,1,fp8,fp8,0,0.7250832080841064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,fp8,0,0.7956975936889649
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,float16,float16,0,0.9809103965759277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,fp8,0,0.454420804977417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,48,8,128,1,fp8,fp8,0,0.8293456077575684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,fp8,fp8,0,0.44256157875061036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,48,128,1,float16,float16,0,0.7771279811859131
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,float16,0,0.43394079208374026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,float16,fp8,0,0.4109183788299561
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,1,128,1,fp8,fp8,0,0.4391520023345947
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,float16,0,0.43959522247314453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,float16,fp8,0,0.45082879066467285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,2,128,1,fp8,fp8,0,0.3940704107284546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,float16,0,0.4511280059814453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,float16,fp8,0,0.3828927993774414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,4,128,1,fp8,fp8,0,0.3842639923095703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,fp8,fp8,0,0.412278413772583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,float16,0,0.5171504020690918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,48,8,128,1,float16,fp8,0,0.38159201145172117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,fp8,0,5.155131149291992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,fp8,fp8,0,5.1315967559814455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,fp8,0,5.138505554199218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,fp8,fp8,0,5.125788879394531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,1,128,1,float16,float16,0,6.275211334228516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,2,128,1,float16,float16,0,6.493377685546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,float16,0,6.7901123046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,float16,fp8,0,5.139590454101563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,4,128,1,fp8,fp8,0,5.160992050170899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,fp8,0,3.54730224609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,fp8,0,5.211980819702148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,fp8,fp8,0,3.3462593078613283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,fp8,fp8,0,5.226489639282226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,float16,0,3.062376022338867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,48,8,128,1,float16,float16,0,7.431084442138672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,48,128,1,float16,float16,0,6.853568267822266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,float16,fp8,0,2.574817657470703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,1,128,1,fp8,fp8,0,2.561199951171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,fp8,0,2.5640960693359376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,fp8,fp8,0,2.591991996765137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,2,128,1,float16,float16,0,3.196513557434082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,fp8,0,2.8698415756225586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,fp8,fp8,0,2.554395294189453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,fp8,0,2.5784944534301757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,fp8,0,1.7903743743896485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,4,128,1,float16,float16,0,3.4633102416992188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,fp8,fp8,0,2.5905920028686524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,48,8,128,1,float16,float16,0,3.6720481872558595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,float16,float16,0,3.326464080810547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,fp8,0,1.345635223388672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,float16,float16,0,1.4928863525390625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,48,128,1,fp8,fp8,0,1.5148096084594727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,fp8,0,1.3257391929626465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,1,128,1,fp8,fp8,0,1.363036823272705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,float16,float16,0,1.6334335327148437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,2,128,1,fp8,fp8,0,1.5102784156799316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,fp8,0,1.304316806793213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,float16,float16,0,1.6428112030029296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,fp8,0,1.3049471855163575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,4,128,1,fp8,fp8,0,1.577017593383789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,fp8,fp8,0,1.3246159553527832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,fp8,0,0.8246928215026855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,48,8,128,1,float16,float16,0,1.7806896209716796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,fp8,fp8,0,0.7580543994903565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,float16,0,0.8490511894226074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,48,128,1,float16,float16,0,1.7201360702514648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,float16,fp8,0,0.677345609664917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,1,128,1,fp8,fp8,0,0.7325984001159668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,float16,0,0.7747007846832276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,float16,fp8,0,0.7011856079101563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,2,128,1,fp8,fp8,0,0.7722432136535644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,float16,0,0.8211440086364746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,float16,fp8,0,0.7536399841308594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,4,128,1,fp8,fp8,0,0.6803055763244629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,fp8,0,0.6716559886932373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,float16,float16,0,0.8975760459899902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,48,8,128,1,fp8,fp8,0,0.7431183815002441
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,fp8,0,0.447983980178833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,fp8,fp8,0,0.4818751811981201
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,fp8,0,0.3573712110519409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,48,128,1,float16,float16,0,0.8646335601806641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,fp8,0,0.38952319622039794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,float16,float16,0,0.4313471794128418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,1,128,1,fp8,fp8,0,0.35708959102630616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,float16,float16,0,0.41685919761657714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,fp8,fp8,0,0.35523200035095215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,2,128,1,fp8,fp8,0,0.38012640476226806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,float16,0,0.4967967987060547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,4,128,1,float16,fp8,0,0.3557888031005859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,float16,0,0.4790527820587158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,float16,fp8,0,0.36487679481506347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,48,8,128,1,fp8,fp8,0,0.3567039966583252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,fp8,0,0.22196319103240966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,float16,float16,0,0.46216797828674316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,48,128,1,fp8,fp8,0,0.2242959976196289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,float16,0,0.23827199935913085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,float16,fp8,0,0.19750880002975463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,1,128,1,fp8,fp8,0,0.20192320346832277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,float16,0,0.2284912109375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,float16,fp8,0,0.19933439493179322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,2,128,1,fp8,fp8,0,0.19755680561065675
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,float16,0,0.23908159732818604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,float16,fp8,0,0.19809600114822387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,4,128,1,fp8,fp8,0,0.2055903911590576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,float16,0,0.25868639945983884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,float16,fp8,0,0.19763200283050536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,48,8,128,1,fp8,fp8,0,0.19941760301589967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,fp8,0,3.0856319427490235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,fp8,fp8,0,3.0753408432006837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,1,128,1,float16,float16,0,3.6947376251220705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,fp8,0,3.0980831146240235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,fp8,fp8,0,3.0814912796020506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,2,128,1,float16,float16,0,3.800038528442383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,float16,0,4.01521110534668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,float16,fp8,0,3.088377571105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,4,128,1,fp8,fp8,0,3.0941232681274413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,fp8,0,3.0955568313598634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,fp8,fp8,0,3.1426111221313477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,fp8,0,2.1049055099487304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,fp8,fp8,0,1.8208192825317382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,48,8,128,1,float16,float16,0,4.835670471191406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,fp8,0,1.5619359970092774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,float16,float16,0,1.7709663391113282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,1,128,1,fp8,fp8,0,1.5856656074523925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,fp8,0,1.570905590057373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,float16,float16,0,1.8226303100585937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,48,128,1,float16,float16,0,5.05462875366211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,2,128,1,fp8,fp8,0,1.5530863761901856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,fp8,0,1.5547743797302247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,fp8,fp8,0,1.717451286315918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,4,128,1,float16,float16,0,1.9999120712280274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,fp8,0,1.555777645111084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,fp8,0,0.924835205078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,float16,float16,0,2.1874271392822267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,48,8,128,1,fp8,fp8,0,1.6920415878295898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,fp8,fp8,0,0.9258288383483887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,float16,0,0.9054736137390137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,float16,fp8,0,0.9026127815246582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,48,128,1,float16,float16,0,2.280950355529785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,1,128,1,fp8,fp8,0,0.8006719589233399
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,fp8,0,0.8538687705993653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,float16,float16,0,0.9230912208557129
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,2,128,1,fp8,fp8,0,0.81899995803833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,float16,0,0.9876848220825195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,float16,fp8,0,0.8301919937133789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,4,128,1,fp8,fp8,0,0.8602560043334961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,fp8,0,0.7963727951049805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,float16,float16,0,1.1100031852722168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,fp8,fp8,0,0.5386415958404541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,fp8,0,0.4386415958404541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,48,8,128,1,fp8,fp8,0,0.8317071914672851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,float16,float16,0,0.4726367950439453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,fp8,0,0.48152961730957033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,48,128,1,float16,float16,0,1.216107177734375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,1,128,1,fp8,fp8,0,0.4139264106750488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,float16,0,0.5108143806457519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,float16,0,0.484446382522583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,float16,fp8,0,0.4636256217956543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,2,128,1,fp8,fp8,0,0.41517438888549807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,float16,fp8,0,0.41448321342468264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,4,128,1,fp8,fp8,0,0.4136191844940186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,float16,0,0.5743775844573975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,float16,fp8,0,0.41427040100097656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,48,8,128,1,fp8,fp8,0,0.41627840995788573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,fp8,0,0.2732800006866455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,fp8,fp8,0,0.25917439460754393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,48,128,1,float16,float16,0,0.5995327949523925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,float16,0,0.25579679012298584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,float16,fp8,0,0.2238464117050171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,1,128,1,fp8,fp8,0,0.22454559803009033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,float16,0,0.2622944116592407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,float16,fp8,0,0.22376959323883056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,2,128,1,fp8,fp8,0,0.22430880069732667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,float16,0,0.2769968032836914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,float16,fp8,0,0.224019193649292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,4,128,1,fp8,fp8,0,0.22360639572143554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,fp8,fp8,0,0.147324800491333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,float16,0,0.3034591913223267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,float16,fp8,0,0.2243056058883667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,48,8,128,1,fp8,fp8,0,0.2245136022567749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,float16,0,0.32132959365844727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,48,128,1,float16,fp8,0,0.14682719707489014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,float16,0,0.14336639642715454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,float16,fp8,0,0.12841440439224244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,1,128,1,fp8,fp8,0,0.12963520288467406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,float16,0,0.14819999933242797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,float16,fp8,0,0.12829439640045165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,2,128,1,fp8,fp8,0,0.12819039821624756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,float16,0,0.155350399017334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,float16,fp8,0,0.1285871982574463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,4,128,1,fp8,fp8,0,0.12816640138626098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,float16,0,0.1720047950744629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,float16,fp8,0,0.12960000038146974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,48,8,128,1,fp8,fp8,0,0.12824480533599852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,fp8,0,3.1150224685668944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,fp8,fp8,0,3.110113525390625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,1,128,1,float16,float16,0,3.688062286376953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,fp8,0,3.1121856689453127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,fp8,fp8,0,3.106657600402832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,2,128,1,float16,float16,0,3.903211212158203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,fp8,0,3.113630485534668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,float16,float16,0,4.099526214599609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,4,128,1,fp8,fp8,0,3.1172672271728517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,fp8,0,2.077833557128906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,fp8,fp8,0,3.1467552185058594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,fp8,0,3.3615726470947265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,fp8,fp8,0,2.1405311584472657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,48,8,128,1,float16,float16,0,4.735919952392578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,fp8,0,1.5921119689941405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,float16,float16,0,1.7953504562377929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,1,128,1,fp8,fp8,0,1.5969663619995118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,fp8,0,1.5768943786621095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,48,128,1,float16,float16,0,5.421169662475586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,float16,float16,0,1.8516176223754883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,2,128,1,fp8,fp8,0,1.7529920578002929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,fp8,0,1.8471376419067382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,float16,float16,0,2.0400495529174805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,4,128,1,fp8,fp8,0,1.57052640914917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,fp8,0,1.5685152053833007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,fp8,0,0.9736063957214356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,fp8,fp8,0,1.7437311172485352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,48,8,128,1,float16,float16,0,2.338161659240723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,fp8,0,0.8000288009643555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,float16,float16,0,0.9734399795532227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,fp8,fp8,0,0.9948575973510743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,1,128,1,fp8,fp8,0,0.7968143939971923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,float16,0,0.9240223884582519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,float16,fp8,0,0.8113072395324707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,48,128,1,float16,float16,0,2.74193115234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,2,128,1,fp8,fp8,0,0.8335696220397949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,float16,0,1.0144767761230469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,float16,fp8,0,0.8402480125427246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,4,128,1,fp8,fp8,0,0.799886417388916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,fp8,0,0.8141152381896972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,float16,float16,0,1.1705568313598633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,48,8,128,1,fp8,fp8,0,0.8015952110290527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,fp8,0,0.5096176147460938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,fp8,fp8,0,0.4993872165679932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,float16,0,0.4596223831176758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,float16,fp8,0,0.41165599822998045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,1,128,1,fp8,fp8,0,0.4114272117614746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,float16,0,0.4785632133483887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,float16,fp8,0,0.4131008148193359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,fp8,0,0.4132559776306152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,48,128,1,float16,float16,0,1.3791584014892577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,2,128,1,fp8,fp8,0,0.4131311893463135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,float16,float16,0,0.518939208984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,4,128,1,fp8,fp8,0,0.411732816696167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,fp8,0,0.4121712207794189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,float16,float16,0,0.5952144145965577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,48,8,128,1,fp8,fp8,0,0.4119408130645752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,fp8,0,0.2628448009490967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,fp8,fp8,0,0.26438720226287843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,float16,0,0.24798879623413086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,48,128,1,float16,float16,0,0.7077712059020996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,float16,fp8,0,0.2191551923751831
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,fp8,0,0.21892158985137938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,1,128,1,fp8,fp8,0,0.2188512086868286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,float16,0,0.25531840324401855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,float16,fp8,0,0.21905438899993895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,2,128,1,fp8,fp8,0,0.21875998973846436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,float16,float16,0,0.27859039306640626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,4,128,1,fp8,fp8,0,0.21935200691223145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,fp8,fp8,0,0.14541120529174806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,float16,0,0.3162992000579834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,float16,fp8,0,0.21963679790496826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,48,8,128,1,fp8,fp8,0,0.2194751977920532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,fp8,0,0.1451024055480957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,fp8,0,0.12166399955749511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,48,128,1,float16,float16,0,0.3734832048416138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,float16,0,0.14390239715576172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,fp8,fp8,0,0.1221168041229248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,1,128,1,float16,fp8,0,0.12225279808044434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,float16,float16,0,0.1464143991470337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,2,128,1,fp8,fp8,0,0.12198879718780517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,float16,0,0.15728479623794556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,float16,0,0.20612480640411376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,float16,fp8,0,0.12211359739303589
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,fp8,fp8,0,0.08482559919357299
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,4,128,1,fp8,fp8,0,0.12256159782409667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,float16,0,0.18024959564208984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,float16,fp8,0,0.12225760221481323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,48,8,128,1,fp8,fp8,0,0.12207520008087158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,48,128,1,float16,fp8,0,0.08535199761390685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,float16,0,0.08759199976921081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,float16,fp8,0,0.07392479777336121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,1,128,1,fp8,fp8,0,0.07388319969177246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,float16,0,0.08749439716339111
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,float16,fp8,0,0.07428479790687562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,2,128,1,fp8,fp8,0,0.07417280077934266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,float16,0,0.09293280243873596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,float16,fp8,0,0.07396960258483887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,4,128,1,fp8,fp8,0,0.07385119795799255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,float16,0,0.09838560223579407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,float16,fp8,0,0.0742896020412445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,48,8,128,1,fp8,fp8,0,0.07406399846076965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,fp8,0,1.9696336746215821
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,float16,float16,0,2.253223991394043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,1,128,1,fp8,fp8,0,1.9704591751098632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,fp8,0,1.9712623596191405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,float16,float16,0,2.342398452758789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,2,128,1,fp8,fp8,0,1.9693744659423829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,fp8,0,1.9715248107910157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,float16,float16,0,2.5687984466552733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,4,128,1,fp8,fp8,0,1.9689327239990235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,fp8,0,1.9693424224853515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,fp8,0,1.2593631744384766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,fp8,fp8,0,1.971500778198242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,fp8,fp8,0,1.377798366546631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,48,8,128,1,float16,float16,0,3.2439823150634766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,float16,0,1.1040528297424317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,float16,fp8,0,0.9955391883850098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,1,128,1,fp8,fp8,0,0.9966032028198242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,float16,0,1.1586688041687012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,float16,fp8,0,1.0277152061462402
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,48,128,1,float16,float16,0,3.8550800323486327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,2,128,1,fp8,fp8,0,1.0266752243041992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,fp8,0,0.9954256057739258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,float16,float16,0,1.292579174041748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,4,128,1,fp8,fp8,0,0.9969679832458496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,fp8,0,1.0000063896179199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,fp8,0,0.6401616096496582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,float16,0,0.5604976177215576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,float16,float16,0,1.5355423927307128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,fp8,fp8,0,0.6815264225006104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,48,8,128,1,fp8,fp8,0,0.9953488349914551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,float16,fp8,0,0.5083951950073242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,1,128,1,fp8,fp8,0,0.5080319881439209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,float16,0,0.5894944190979003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,float16,fp8,0,0.5318064212799072
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,48,128,1,float16,float16,0,1.9463104248046874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,2,128,1,fp8,fp8,0,0.5195104122161865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,float16,0,0.6540112018585205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,float16,fp8,0,0.5084400177001953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,4,128,1,fp8,fp8,0,0.5084559917449951
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,fp8,fp8,0,0.5223936080932617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,fp8,0,0.5080448150634765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,fp8,fp8,0,0.33222720623016355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,48,8,128,1,float16,float16,0,0.7698383808135987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,fp8,0,0.33773438930511473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,float16,0,0.297544002532959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,float16,fp8,0,0.2657504081726074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,1,128,1,fp8,fp8,0,0.2675503969192505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,48,128,1,float16,float16,0,0.9871536254882812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,float16,0,0.31177759170532227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,float16,fp8,0,0.2657344102859497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,2,128,1,fp8,fp8,0,0.2655872106552124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,float16,0,0.34242401123046873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,float16,fp8,0,0.2652960062026978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,4,128,1,fp8,fp8,0,0.26574718952178955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,float16,0,0.5103695869445801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,float16,0,0.4010960102081299
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,float16,0,0.16946879625320435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,float16,fp8,0,0.1441648006439209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,float16,fp8,0,0.267143988609314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,48,8,128,1,fp8,fp8,0,0.2653520107269287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,fp8,fp8,0,0.1444048047065735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,float16,fp8,0,0.17796159982681276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,48,128,1,fp8,fp8,0,0.1772063970565796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,1,128,1,fp8,fp8,0,0.14459999799728393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,float16,0,0.176529598236084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,2,128,1,float16,fp8,0,0.14411360025405884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,float16,0,0.19201279878616334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,float16,fp8,0,0.14436639547348024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,4,128,1,fp8,fp8,0,0.144652795791626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,float16,0,0.2190623998641968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,float16,fp8,0,0.14447360038757323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,48,8,128,1,fp8,fp8,0,0.1450384020805359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,fp8,0,0.10130720138549805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,float16,float16,0,0.2729311943054199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,48,128,1,fp8,fp8,0,0.10164159536361694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,fp8,fp8,0,0.08298239707946778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,float16,0,0.09656959772109985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,float16,fp8,0,0.0827679991722107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,1,128,1,fp8,fp8,0,0.08333119750022888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,float16,0,0.1016271948814392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,2,128,1,float16,fp8,0,0.08272799849510193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,float16,0,0.10664639472961426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,float16,fp8,0,0.08375999927520753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,4,128,1,fp8,fp8,0,0.08268640041351319
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,float16,0,0.12319040298461914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,float16,fp8,0,0.08298720121383667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,48,8,128,1,fp8,fp8,0,0.08277279734611512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,float16,0,0.15176479816436766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,float16,0,0.06569439768791199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,float16,fp8,0,0.05979359745979309
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,48,128,1,fp8,fp8,0,0.06008800268173218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,float16,0,0.065801602602005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,float16,fp8,0,0.051729601621627805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,1,128,1,fp8,fp8,0,0.05224800109863281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,float16,fp8,0,0.051868802309036253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,2,128,1,fp8,fp8,0,0.0518127977848053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,float16,0,0.06958079934120179
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,float16,fp8,0,0.05179200172424316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,4,128,1,fp8,fp8,0,0.05215680003166199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,float16,0,0.07399359941482545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,float16,fp8,0,0.05217440128326416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,48,8,128,1,fp8,fp8,0,0.05237280130386353
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,fp8,0,2.1274255752563476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,float16,float16,0,2.3427024841308595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,1,128,1,fp8,fp8,0,2.1266416549682616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,fp8,0,2.1276960372924805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,fp8,fp8,0,2.1234575271606446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,2,128,1,float16,float16,0,2.5244720458984373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,fp8,0,2.1231311798095702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,float16,float16,0,2.8344655990600587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,4,128,1,fp8,fp8,0,2.1240608215332033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,fp8,0,1.4190671920776368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,fp8,0,2.127414321899414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,fp8,fp8,0,2.123276710510254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,fp8,fp8,0,1.4889599800109863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,float16,0,1.1653152465820313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,48,8,128,1,float16,float16,0,3.5410160064697265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,float16,fp8,0,1.0708751678466797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,1,128,1,fp8,fp8,0,1.0729056358337403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,float16,0,1.246553611755371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,float16,fp8,0,1.0860176086425781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,2,128,1,fp8,fp8,0,1.0711456298828126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,fp8,0,1.0709792137145997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,float16,float16,0,1.404580783843994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,4,128,1,fp8,fp8,0,1.07249755859375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,48,128,1,float16,float16,0,4.858694458007813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,fp8,0,1.073020839691162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,float16,float16,0,1.7963455200195313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,48,8,128,1,fp8,fp8,0,1.0711440086364745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,fp8,0,0.7359504222869873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,fp8,fp8,0,0.7198880195617676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,fp8,0,0.5444240093231201
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,float16,float16,0,0.5980512142181397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,1,128,1,fp8,fp8,0,0.5481279850006103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,fp8,0,0.5449615955352783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,float16,float16,0,0.6352303981781006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,2,128,1,fp8,fp8,0,0.5451024055480957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,fp8,0,0.5447984218597413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,float16,float16,0,0.7141903877258301
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,4,128,1,fp8,fp8,0,0.5468575954437256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,48,128,1,float16,float16,0,2.428331184387207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,fp8,0,0.5447840213775634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,float16,float16,0,0.8766672134399414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,48,8,128,1,fp8,fp8,0,0.5461120128631591
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,fp8,0,0.3682352066040039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,float16,0,0.31485440731048586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,fp8,fp8,0,0.3687504053115845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,float16,fp8,0,0.28135840892791747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,1,128,1,fp8,fp8,0,0.28167679309844973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,float16,0,0.33297441005706785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,float16,fp8,0,0.282478404045105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,2,128,1,fp8,fp8,0,0.28141920566558837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,fp8,0,0.281825590133667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,48,128,1,float16,float16,0,1.2290224075317382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,fp8,fp8,0,0.2815727949142456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,4,128,1,float16,float16,0,0.3740432024002075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,fp8,0,0.28260800838470457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,float16,float16,0,0.4521183967590332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,48,8,128,1,fp8,fp8,0,0.2828991889953613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,fp8,0,0.1950816035270691
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,fp8,fp8,0,0.19438079595565796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,48,128,1,float16,float16,0,0.6295584201812744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,float16,0,0.17530239820480348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,float16,fp8,0,0.15168479681015015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,1,128,1,fp8,fp8,0,0.15021599531173707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,fp8,fp8,0,0.15034719705581664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,float16,0,0.18336000442504882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,float16,fp8,0,0.1510192036628723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,2,128,1,fp8,fp8,0,0.15044159889221193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,float16,0,0.20351040363311768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,fp8,0,0.10720800161361695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,4,128,1,float16,fp8,0,0.15014400482177734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,float16,0,0.24480319023132324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,float16,fp8,0,0.15067360401153565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,48,8,128,1,fp8,fp8,0,0.15139360427856446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,float16,float16,0,0.33045599460601804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,48,128,1,fp8,fp8,0,0.10703999996185302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,float16,0,0.10079519748687744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,float16,fp8,0,0.08398399949073791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,1,128,1,fp8,fp8,0,0.0836896002292633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,float16,0,0.11001440286636352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,float16,0,0.13837759494781493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,float16,fp8,0,0.08353760242462158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,2,128,1,fp8,fp8,0,0.08361920118331909
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,float16,0,0.1178272008895874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,float16,fp8,0,0.08368319869041443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,4,128,1,fp8,fp8,0,0.08380799889564514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,float16,fp8,0,0.08439840078353882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,48,8,128,1,fp8,fp8,0,0.0845296025276184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,float16,0,0.18042399883270263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,float16,fp8,0,0.0615119993686676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,48,128,1,fp8,fp8,0,0.06203839778900146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,float16,0,0.06371359825134278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,float16,fp8,0,0.05083360075950623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,1,128,1,fp8,fp8,0,0.050526398420333865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,float16,0,0.0638047993183136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,float16,fp8,0,0.05141599774360657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,2,128,1,fp8,fp8,0,0.050012797117233276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,float16,0,0.06799520254135132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,float16,fp8,0,0.05133280158042908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,4,128,1,fp8,fp8,0,0.050672000646591185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,float16,0,0.07456480264663697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,float16,fp8,0,0.05045120120048523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,48,8,128,1,fp8,fp8,0,0.05100799798965454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,float16,0,0.08881279826164246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,float16,fp8,0,0.04018400013446808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,48,128,1,fp8,fp8,0,0.039987200498580934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,float16,0,0.04736160039901734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,float16,fp8,0,0.03505600094795227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,1,128,1,fp8,fp8,0,0.035129600763320924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,float16,0,0.047332799434661864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,float16,fp8,0,0.03505280017852783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,2,128,1,fp8,fp8,0,0.03505919873714447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,float16,0,0.047353601455688475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,float16,fp8,0,0.035068801045417784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,4,128,1,fp8,fp8,0,0.035067200660705566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,float16,0,0.053523200750350955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,float16,fp8,0,0.03518719971179962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,48,8,128,1,fp8,fp8,0,0.03499360084533691
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,float16,0,1.7639680862426759
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,float16,fp8,0,1.6309104919433595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,1,128,1,fp8,fp8,0,1.6281679153442383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,float16,0,1.9237503051757812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,float16,fp8,0,1.6275392532348634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,2,128,1,fp8,fp8,0,1.6283376693725586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,float16,0,2.247315216064453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,float16,fp8,0,1.6388111114501953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,4,128,1,fp8,fp8,0,1.626927947998047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,fp8,0,1.6235248565673828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,fp8,fp8,0,1.6251455307006837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,fp8,0,1.1574895858764649
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,fp8,0,0.8221455574035644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,float16,float16,0,0.8978303909301758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,48,8,128,1,float16,float16,0,2.891891288757324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,fp8,fp8,0,1.160852813720703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,1,128,1,fp8,fp8,0,0.8210512161254883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,float16,0,0.9769264221191406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,float16,fp8,0,0.8199328422546387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,2,128,1,fp8,fp8,0,0.8213232040405274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,fp8,0,0.8205632209777832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,float16,float16,0,1.1384847640991211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,4,128,1,fp8,fp8,0,0.8213520050048828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,fp8,0,0.8204000473022461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,fp8,fp8,0,0.8191984176635743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,48,128,1,float16,float16,0,4.522422409057617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,fp8,0,0.5880864143371582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,float16,0,0.46304960250854493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,48,8,128,1,float16,float16,0,1.4572640419006349
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,fp8,fp8,0,0.587608003616333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,float16,fp8,0,0.41831679344177247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,1,128,1,fp8,fp8,0,0.41800479888916015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,fp8,fp8,0,0.4183680057525635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,fp8,0,0.418012809753418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,2,128,1,float16,float16,0,0.5024576187133789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,fp8,0,0.4178112030029297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,float16,float16,0,0.5808864116668702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,48,128,1,float16,float16,0,2.2771968841552734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,4,128,1,fp8,fp8,0,0.41799359321594237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,fp8,0,0.4180592060089111
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,fp8,fp8,0,0.4178912162780762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,48,8,128,1,float16,float16,0,0.7400271892547607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,fp8,0,0.2165168046951294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,fp8,0,0.30179519653320314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,fp8,fp8,0,0.3011919975280762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,float16,float16,0,0.24701919555664062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,1,128,1,fp8,fp8,0,0.2168560028076172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,48,128,1,float16,float16,0,1.1526864051818848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,float16,0,0.2663919925689697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,float16,fp8,0,0.21598401069641113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,2,128,1,fp8,fp8,0,0.2167135953903198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,fp8,fp8,0,0.21663360595703124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,float16,0,0.30615839958190916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,float16,fp8,0,0.2167680025100708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,4,128,1,fp8,fp8,0,0.21639680862426758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,float16,0,0.3836303949356079
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,48,8,128,1,float16,fp8,0,0.2171839952468872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,fp8,0,0.15909119844436645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,fp8,fp8,0,0.15920959711074828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,48,128,1,float16,float16,0,0.5901391983032227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,float16,0,0.14124959707260132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,float16,fp8,0,0.11649600267410279
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,1,128,1,fp8,fp8,0,0.11619839668273926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,float16,0,0.14908159971237184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,float16,fp8,0,0.11624319553375244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,2,128,1,fp8,fp8,0,0.1162559986114502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,float16,0,0.1677199959754944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,float16,fp8,0,0.11605440378189087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,4,128,1,fp8,fp8,0,0.11722400188446044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,float16,0,0.20684640407562255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,float16,fp8,0,0.11661440134048462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,48,8,128,1,fp8,fp8,0,0.11686240434646607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,float16,0,0.30911519527435305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,float16,fp8,0,0.08834559917449951
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,48,128,1,fp8,fp8,0,0.08841120004653931
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,float16,0,0.08255680203437805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,float16,0,0.098198401927948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,float16,fp8,0,0.06520000100135803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,1,128,1,fp8,fp8,0,0.06465439796447754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,float16,0,0.08829280138015747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,float16,fp8,0,0.06486560106277466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,2,128,1,fp8,fp8,0,0.06466400027275085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,float16,fp8,0,0.06527199745178222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,4,128,1,fp8,fp8,0,0.06556959748268128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,float16,0,0.11799360513687134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,float16,fp8,0,0.06584320068359376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,48,8,128,1,fp8,fp8,0,0.06548159718513488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,float16,0,0.16781920194625854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,float16,fp8,0,0.0493120014667511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,48,128,1,fp8,fp8,0,0.04960319995880127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,float16,0,0.0514415979385376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,float16,fp8,0,0.03920319974422455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,1,128,1,fp8,fp8,0,0.039057600498199466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,float16,0,0.05136160254478454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,float16,fp8,0,0.03922240138053894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,fp8,0,0.03913280069828033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,2,128,1,fp8,fp8,0,0.03916159868240356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,float16,0,0.05658079981803894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,float16,fp8,0,0.03918080031871796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,4,128,1,fp8,fp8,0,0.03914560079574585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,float16,0,0.039108800888061526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,float16,float16,0,0.0620576024055481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,48,8,128,1,fp8,fp8,0,0.03916000127792359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,float16,0,0.0839631974697113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,float16,fp8,0,0.0317903995513916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,48,128,1,fp8,fp8,0,0.0329584002494812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,float16,fp8,0,0.026851201057434083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,1,128,1,fp8,fp8,0,0.026950401067733765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,float16,0,0.03912799954414368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,float16,fp8,0,0.026966398954391478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,2,128,1,fp8,fp8,0,0.026966398954391478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,float16,0,0.04859519898891449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,float16,0,0.03928160071372986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,float16,fp8,0,0.026815998554229736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,4,128,1,fp8,fp8,0,0.026982399821281432
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,float16,0,0.04334880113601684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,float16,fp8,0,0.026814401149749756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,48,8,128,1,fp8,fp8,0,0.026979199051856993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,fp8,0,0.018723200261592864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,float16,fp8,0,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,48,128,1,fp8,fp8,0,0.020630399882793426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,float16,0,0.030555200576782227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,float16,fp8,0,0.018764799833297728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,1,128,1,fp8,fp8,0,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,float16,float16,0,0.028887999057769776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,2,128,1,fp8,fp8,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,float16,0,0.028923198580741882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,float16,fp8,0,0.01879359930753708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,4,128,1,fp8,fp8,0,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,float16,0,0.03091520071029663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,float16,fp8,0,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,48,8,128,1,fp8,fp8,0,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,float16,0,0.7459904193878174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,float16,fp8,0,0.6807983875274658
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,1,128,1,fp8,fp8,0,0.6807439804077149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,float16,0,0.8244992256164551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,fp8,fp8,0,0.6802432060241699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,2,128,1,float16,fp8,0,0.6800047874450683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,fp8,0,0.6801583766937256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,float16,float16,0,0.9848496437072753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,4,128,1,fp8,fp8,0,0.6785999774932862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,fp8,0,0.6781760215759277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,fp8,fp8,0,0.679315185546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,fp8,0,0.5153696060180664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,fp8,0,0.3446928024291992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,float16,float16,0,0.3865904092788696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,fp8,fp8,0,0.5167888164520263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,48,8,128,1,float16,float16,0,1.3037775993347167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,1,128,1,fp8,fp8,0,0.3460527896881104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,float16,0,0.4243743896484375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,float16,fp8,0,0.3455424070358276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,2,128,1,fp8,fp8,0,0.34439198970794677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,float16,0,0.5018335819244385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,float16,fp8,0,0.3453648090362549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,4,128,1,fp8,fp8,0,0.3450752019882202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,48,128,1,float16,float16,0,2.193294334411621
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,fp8,0,0.34600799083709716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,float16,float16,0,0.6613423824310303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,48,8,128,1,fp8,fp8,0,0.34622080326080323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,fp8,0,0.17868160009384154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,fp8,0,0.26245119571685793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,fp8,fp8,0,0.26390719413757324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,float16,float16,0,0.20572319030761718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,1,128,1,fp8,fp8,0,0.1780511975288391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,48,128,1,float16,float16,0,1.108459186553955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,float16,0,0.22688798904418944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,float16,fp8,0,0.17839039564132692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,float16,0,0.34169280529022217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,2,128,1,fp8,fp8,0,0.1783776044845581
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,float16,0,0.2640784025192261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,float16,fp8,0,0.17798080444335937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,4,128,1,fp8,fp8,0,0.1775472044944763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,float16,fp8,0,0.17870399951934815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,48,8,128,1,fp8,fp8,0,0.17848800420761107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,fp8,0,0.14011839628219605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,fp8,fp8,0,0.13973599672317505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,float16,0,0.11991679668426514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,48,128,1,float16,float16,0,0.5689328193664551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,float16,fp8,0,0.09667999744415283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,1,128,1,fp8,fp8,0,0.09692000150680542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,float16,0,0.12797919511795045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,float16,fp8,0,0.09703519940376282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,2,128,1,fp8,fp8,0,0.09668639898300171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,float16,0,0.14761600494384766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,float16,fp8,0,0.09723680019378662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,4,128,1,fp8,fp8,0,0.09701120257377624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,float16,0,0.1860592007637024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,float16,fp8,0,0.09741119742393493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,48,8,128,1,fp8,fp8,0,0.09716799855232239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,fp8,0,0.07807040214538574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,float16,float16,0,0.2993664026260376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,float16,0,0.07818080186843872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,48,128,1,fp8,fp8,0,0.07836639881134033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,float16,0,0.07082399725914001
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,float16,fp8,0,0.055499202013015746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,1,128,1,fp8,fp8,0,0.055553597211837766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,float16,fp8,0,0.055404800176620486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,2,128,1,fp8,fp8,0,0.055587202310562134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,float16,0,0.0866096019744873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,float16,fp8,0,0.05551360249519348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,4,128,1,fp8,fp8,0,0.055446398258209226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,float16,0,0.10821759700775146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,float16,fp8,0,0.05573760271072388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,48,8,128,1,fp8,fp8,0,0.05559200048446655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,float16,0,0.16010559797286988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,float16,fp8,0,0.043140798807144165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,48,128,1,fp8,fp8,0,0.0431439995765686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,float16,0,0.043171200156211856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,float16,fp8,0,0.0322735995054245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,1,128,1,fp8,fp8,0,0.03129279911518097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,fp8,fp8,0,0.03236320018768311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,float16,0,0.04332480132579804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,float16,fp8,0,0.031857600808143614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,2,128,1,fp8,fp8,0,0.03219679892063141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,float16,0,0.04774399995803833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,4,128,1,float16,fp8,0,0.03153280019760132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,float16,0,0.054360002279281616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,float16,fp8,0,0.03164800107479095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,48,8,128,1,fp8,fp8,0,0.03236640095710754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,float16,0,0.07580959796905518
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,float16,fp8,0,0.028833600878715514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,48,128,1,fp8,fp8,0,0.028798401355743408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,float16,0,0.03501279950141907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,float16,fp8,0,0.02290399968624115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,1,128,1,fp8,fp8,0,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,float16,0,0.034974399209022525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,float16,fp8,0,0.022844800353050233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,fp8,fp8,0,0.02279199957847595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,2,128,1,fp8,fp8,0,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,float16,0,0.035025599598884585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,float16,fp8,0,0.02285760045051575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,4,128,1,fp8,fp8,0,0.02294880002737045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,float16,0,0.0400191992521286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,fp8,fp8,0,0.014710399508476257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,48,8,128,1,float16,fp8,0,0.02285760045051575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,float16,0,0.04524640142917633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,float16,fp8,0,0.01855359971523285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,48,128,1,fp8,fp8,0,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,float16,0,0.025945600867271424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,1,128,1,float16,fp8,0,0.015503999590873719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,float16,0,0.026846399903297423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,float16,fp8,0,0.014767999947071075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,2,128,1,fp8,fp8,0,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,float16,0,0.02682879865169525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,float16,fp8,0,0.01624480038881302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,4,128,1,fp8,fp8,0,0.014830400049686433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,float16,0,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,float16,0,0.02683520019054413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,float16,fp8,0,0.016495999693870545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,48,8,128,1,fp8,fp8,0,0.015171200037002563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,float16,0,0.033055999875068666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,float16,fp8,0,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,48,128,1,fp8,fp8,0,0.016755199432373045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,float16,fp8,0,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,1,128,1,fp8,fp8,0,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,float16,0,0.0247311994433403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,float16,fp8,0,0.014524799585342408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,2,128,1,fp8,fp8,0,0.014571200311183929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,float16,0,0.02486239969730377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,float16,fp8,0,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,4,128,1,fp8,fp8,0,0.014641599357128143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,float16,float16,0,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,48,8,128,1,fp8,fp8,0,0.01467359960079193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,float16,0,0.45310077667236326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,float16,fp8,0,0.4088575839996338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,1,128,1,fp8,fp8,0,0.40836319923400877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,float16,0,0.4918479919433594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,float16,fp8,0,0.40799360275268554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,2,128,1,fp8,fp8,0,0.4084911823272705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,fp8,0,0.4075888156890869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,fp8,0,0.407260799407959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,fp8,fp8,0,0.4080848217010498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,float16,float16,0,0.5680399894714355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,4,128,1,fp8,fp8,0,0.40797438621521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,48,8,128,1,float16,float16,0,0.7264944076538086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,fp8,0,0.29482240676879884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,fp8,fp8,0,0.29369120597839354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,float16,0,0.25972800254821776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,float16,0,0.2391439914703369
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,float16,fp8,0,0.20973119735717774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,1,128,1,fp8,fp8,0,0.21015520095825196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,48,128,1,float16,float16,0,1.1393952369689941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,float16,fp8,0,0.2093791961669922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,2,128,1,fp8,fp8,0,0.20964159965515136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,float16,0,0.2972944021224976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,float16,fp8,0,0.20975039005279542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,4,128,1,fp8,fp8,0,0.20965919494628907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,float16,0,0.3731872081756592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,float16,fp8,0,0.20986080169677734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,48,8,128,1,fp8,fp8,0,0.2095247983932495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,fp8,0,0.15268959999084472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,fp8,fp8,0,0.11022080183029175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,float16,float16,0,0.5811039924621582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,48,128,1,fp8,fp8,0,0.15248960256576538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,float16,0,0.13412959575653077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,1,128,1,float16,fp8,0,0.11001759767532349
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,float16,0,0.1422544002532959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,float16,fp8,0,0.11021440029144287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,2,128,1,fp8,fp8,0,0.1100767970085144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,float16,0,0.16219040155410766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,float16,fp8,0,0.10998400449752807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,4,128,1,fp8,fp8,0,0.10953919887542725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,float16,0,0.19982399940490722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,float16,fp8,0,0.10968159437179566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,48,8,128,1,fp8,fp8,0,0.1093392014503479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,fp8,0,0.08254240155220031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,float16,float16,0,0.3030927896499634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,48,128,1,fp8,fp8,0,0.0831712007522583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,float16,0,0.07833600044250488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,float16,fp8,0,0.05974879860877991
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,1,128,1,fp8,fp8,0,0.05960800051689148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,float16,0,0.08496159911155701
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,float16,fp8,0,0.059601598978042604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,2,128,1,fp8,fp8,0,0.06012639999389648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,float16,0,0.09353439807891846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,float16,fp8,0,0.059699201583862306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,4,128,1,fp8,fp8,0,0.059761601686477664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,float16,0,0.11296160221099853
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,float16,fp8,0,0.06090080142021179
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,48,8,128,1,fp8,fp8,0,0.060096001625061034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,float16,0,0.16252479553222657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,float16,fp8,0,0.04530560076236725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,48,128,1,fp8,fp8,0,0.04565599858760834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,float16,0,0.04727199971675873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,float16,fp8,0,0.034985598921775815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,1,128,1,fp8,fp8,0,0.03506079912185669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,float16,0,0.04737119972705841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,float16,fp8,0,0.03517279922962189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,2,128,1,fp8,fp8,0,0.034995201230049136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,float16,0,0.05204960107803345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,float16,fp8,0,0.0350271999835968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,4,128,1,fp8,fp8,0,0.03517279922962189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,float16,0,0.061622399091720584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,float16,fp8,0,0.03516800105571747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,48,8,128,1,fp8,fp8,0,0.035104000568389894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,float16,0,0.0751856029033661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,float16,fp8,0,0.026900801062583923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,48,128,1,fp8,fp8,0,0.026748800277709962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,float16,0,0.033327999711036685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,float16,fp8,0,0.021236799657344818
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,fp8,fp8,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,1,128,1,fp8,fp8,0,0.02106720060110092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,float16,0,0.03312320113182068
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,float16,fp8,0,0.020849600434303284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,2,128,1,fp8,fp8,0,0.020895999670028687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,float16,0,0.03303999900817871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,4,128,1,float16,fp8,0,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,float16,0,0.03928000032901764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,float16,fp8,0,0.0208639994263649
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,48,8,128,1,fp8,fp8,0,0.02158239930868149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,float16,0,0.04592480063438416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,float16,fp8,0,0.01889919936656952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,48,128,1,fp8,fp8,0,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,float16,0,0.02690559923648834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,float16,fp8,0,0.01706400066614151
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,1,128,1,fp8,fp8,0,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,float16,0,0.027423998713493346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,float16,fp8,0,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,2,128,1,fp8,fp8,0,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,float16,0,0.027169600129127502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,float16,fp8,0,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,4,128,1,fp8,fp8,0,0.01652960032224655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,float16,0,0.028815999627113342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,float16,fp8,0,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,48,8,128,1,fp8,fp8,0,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,float16,0,0.03076159954071045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,float16,fp8,0,0.012451200187206269
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,fp8,fp8,0,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,48,128,1,fp8,fp8,0,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,float16,0,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,float16,fp8,0,0.011433599889278412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,1,128,1,fp8,fp8,0,0.011609599739313126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,float16,0,0.020854400098323823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,2,128,1,float16,fp8,0,0.01202400028705597
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,float16,0,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,fp8,0,0.012534399330615998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,4,128,1,fp8,fp8,0,0.011380799859762192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,float16,0,0.022726400196552275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,fp8,fp8,0,0.010958400368690491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,float16,fp8,0,0.012081599980592727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,48,8,128,1,fp8,fp8,0,0.012008000165224075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,float16,float16,0,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,48,128,1,fp8,fp8,0,0.011100800335407257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,float16,0,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,1,128,1,float16,fp8,0,0.010539200156927109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,float16,0,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,float16,fp8,0,0.01098719984292984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,2,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,float16,0,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,4,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,float16,fp8,0,0.010952000319957734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,48,8,128,1,fp8,fp8,0,0.010824000090360641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,float16,0,0.3550704002380371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,float16,fp8,0,0.3135135889053345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,1,128,1,fp8,fp8,0,0.3135663986206055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,float16,0,0.373854398727417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,float16,fp8,0,0.313590407371521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,fp8,fp8,0,0.3126863956451416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,2,128,1,fp8,fp8,0,0.3133968114852905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,float16,0,0.41164321899414064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,float16,0,0.4899951934814453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,4,128,1,float16,fp8,0,0.31393439769744874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,float16,fp8,0,0.3123023986816406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,48,8,128,1,fp8,fp8,0,0.31259520053863527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,fp8,0,0.20409278869628905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,fp8,fp8,0,0.2040623903274536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,float16,0,0.19194079637527467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,fp8,0,0.1620911955833435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,fp8,fp8,0,0.162281596660614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,48,128,1,float16,float16,0,0.6409296035766602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,float16,fp8,0,0.1625391960144043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,1,128,1,fp8,fp8,0,0.162118399143219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,2,128,1,float16,float16,0,0.1995360016822815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,float16,0,0.21973280906677245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,float16,fp8,0,0.1623103976249695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,4,128,1,fp8,fp8,0,0.16225279569625856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,float16,0,0.25789918899536135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,float16,fp8,0,0.16304479837417601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,48,8,128,1,fp8,fp8,0,0.1621567964553833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,fp8,0,0.10836479663848878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,float16,float16,0,0.33169600963592527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,48,128,1,fp8,fp8,0,0.10784159898757935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,float16,0,0.10758719444274903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,float16,fp8,0,0.08511840105056763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,1,128,1,fp8,fp8,0,0.08619359731674195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,float16,0,0.11388959884643554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,float16,fp8,0,0.0857200026512146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,2,128,1,fp8,fp8,0,0.085726398229599
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,float16,0,0.12254400253295898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,float16,fp8,0,0.08520159721374512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,4,128,1,fp8,fp8,0,0.08613280057907105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,float16,0,0.14151519536972046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,float16,fp8,0,0.08649119734764099
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,48,8,128,1,fp8,fp8,0,0.08594239950180053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,float16,0,0.1767359972000122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,float16,fp8,0,0.05796319842338562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,48,128,1,fp8,fp8,0,0.05884000062942505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,float16,0,0.061238402128219606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,float16,fp8,0,0.04753600060939789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,1,128,1,fp8,fp8,0,0.04786399900913239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,float16,0,0.06191679835319519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,float16,0,0.07384639978408813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,float16,fp8,0,0.04737119972705841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,2,128,1,fp8,fp8,0,0.04734559953212738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,float16,0,0.06669759750366211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,float16,fp8,0,0.047307199239730834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,4,128,1,fp8,fp8,0,0.047391998767852786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,float16,fp8,0,0.047679999470710756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,48,8,128,1,fp8,fp8,0,0.047470399737358095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,float16,0,0.08526560068130493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,float16,fp8,0,0.0333296000957489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,48,128,1,fp8,fp8,0,0.033318400382995605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,float16,0,0.041206398606300355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,float16,fp8,0,0.028835201263427736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,1,128,1,fp8,fp8,0,0.029046401381492615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,float16,0,0.04142720103263855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,fp8,0,0.02892639935016632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,float16,fp8,0,0.028838399052619933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,2,128,1,fp8,fp8,0,0.02892799973487854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,float16,0,0.041705599427223204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,float16,fp8,0,0.028828799724578857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,4,128,1,fp8,fp8,0,0.029046401381492615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,float16,float16,0,0.04644320011138916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,48,8,128,1,fp8,fp8,0,0.028886398673057555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,float16,0,0.04937280118465424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,float16,fp8,0,0.020737600326538087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,48,128,1,fp8,fp8,0,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,float16,0,0.030881598591804504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,float16,fp8,0,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,1,128,1,fp8,fp8,0,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,float16,0,0.030857598781585692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,float16,fp8,0,0.01855680048465729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,2,128,1,fp8,fp8,0,0.018747200071811677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,float16,0,0.031004801392555237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,float16,fp8,0,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,4,128,1,fp8,fp8,0,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,float16,0,0.030884799361228944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,48,8,128,1,fp8,fp8,0,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,float16,0,0.03312479853630066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,float16,fp8,0,0.016092799603939056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,48,128,1,fp8,fp8,0,0.01562879979610443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,float16,0,0.024803200364112855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,float16,fp8,0,0.014473600685596466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,1,128,1,fp8,fp8,0,0.014508800208568573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,float16,0,0.024916799366474153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,float16,fp8,0,0.014507199823856353
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,2,128,1,fp8,fp8,0,0.014561599493026734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,float16,0,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,float16,fp8,0,0.014550399780273438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,fp8,0,0.011383999884128571
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,4,128,1,fp8,fp8,0,0.014472000300884247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,float16,0,0.024809600412845613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,48,8,128,1,fp8,fp8,0,0.01465120017528534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,float16,float16,0,0.024695999920368195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,48,128,1,fp8,fp8,0,0.0110895998775959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,float16,0,0.020598399639129638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,1,128,1,fp8,fp8,0,0.010795199871063232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,float16,0,0.021118399500846863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,2,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,float16,0,0.020695999264717102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,4,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,float16,0,0.020791999995708466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,48,8,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,float16,0,0.01961439996957779
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,48,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,float16,0,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,1,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,float16,0,0.018772800266742707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,float16,0,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,4,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,float16,fp8,0,0.0104592002928257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,48,8,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,float16,0,0.31958880424499514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,float16,fp8,0,0.27614240646362304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,1,128,1,fp8,fp8,0,0.276476788520813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,float16,0,0.3222928047180176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,fp8,fp8,0,0.27570879459381104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,float16,fp8,0,0.2761055946350098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,2,128,1,fp8,fp8,0,0.2765568017959595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,float16,0,0.3425152063369751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,4,128,1,float16,fp8,0,0.27565760612487794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,float16,0,0.38131840229034425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,float16,fp8,0,0.27660160064697265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,48,8,128,1,fp8,fp8,0,0.27626240253448486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,fp8,fp8,0,0.14192320108413697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,fp8,0,0.16381920576095582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,float16,float16,0,0.39737439155578613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,48,128,1,fp8,fp8,0,0.16403039693832397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,float16,0,0.16958400011062622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,1,128,1,float16,fp8,0,0.1414080023765564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,float16,0,0.1806112051010132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,float16,fp8,0,0.1418063998222351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,2,128,1,fp8,fp8,0,0.14189280271530152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,fp8,fp8,0,0.14312000274658204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,float16,0,0.1839311957359314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,float16,fp8,0,0.14160000085830687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,4,128,1,fp8,fp8,0,0.14198720455169678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,float16,0,0.2035680055618286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,48,8,128,1,float16,fp8,0,0.14191839694976807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,float16,0,0.20914239883422853
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,float16,fp8,0,0.08645280003547669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,48,128,1,fp8,fp8,0,0.08677759766578674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,float16,0,0.09271199703216552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,float16,fp8,0,0.07498239874839782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,fp8,0,0.0756608009338379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,1,128,1,fp8,fp8,0,0.07461119890213012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,float16,0,0.09298400282859802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,float16,fp8,0,0.07477120161056519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,2,128,1,fp8,fp8,0,0.0745631992816925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,float16,float16,0,0.09902880191802979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,4,128,1,fp8,fp8,0,0.07471519708633423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,float16,0,0.10719039440155029
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,float16,fp8,0,0.07553120255470276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,48,8,128,1,fp8,fp8,0,0.07521920204162598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,float16,0,0.10724960565567017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,float16,fp8,0,0.04738560020923614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,48,128,1,fp8,fp8,0,0.04735200107097626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,float16,0,0.057499200105667114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,float16,fp8,0,0.0420879989862442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,1,128,1,fp8,fp8,0,0.042849600315093994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,float16,0,0.05755360126495361
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,float16,fp8,0,0.04219039976596832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,2,128,1,fp8,fp8,0,0.04265280067920685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,float16,0,0.05762240290641785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,float16,fp8,0,0.04202240109443665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,4,128,1,fp8,fp8,0,0.042361599206924436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,float16,0,0.06214399933815003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,float16,fp8,0,0.04323199987411499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,48,8,128,1,fp8,fp8,0,0.04290400147438049
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,float16,0,0.05773760080337524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,float16,fp8,0,0.028862398862838746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,48,128,1,fp8,fp8,0,0.028889599442481994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,float16,0,0.03924480080604553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,float16,fp8,0,0.026124799251556398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,1,128,1,fp8,fp8,0,0.026785600185394286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,fp8,fp8,0,0.026150399446487428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,float16,0,0.03945760130882263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,float16,0,0.03919520080089569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,float16,fp8,0,0.026414400339126586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,2,128,1,fp8,fp8,0,0.026686400175094604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,float16,0,0.03915359973907471
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,4,128,1,float16,fp8,0,0.02696160078048706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,float16,fp8,0,0.0265392005443573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,48,8,128,1,fp8,fp8,0,0.026974400877952574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,float16,0,0.03702239990234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,float16,fp8,0,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,fp8,0,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,48,128,1,fp8,fp8,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,float16,0,0.02863999903202057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,float16,fp8,0,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,1,128,1,fp8,fp8,0,0.016648000478744505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,float16,float16,0,0.026851201057434083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,2,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,float16,0,0.027638399600982667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,float16,fp8,0,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,4,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,float16,0,0.028907200694084166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,float16,fp8,0,0.016524800658226015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,48,8,128,1,fp8,fp8,0,0.016711999475955964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,float16,0,0.0268095999956131
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,float16,fp8,0,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,48,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,float16,0,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,float16,fp8,0,0.013327999413013459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,1,128,1,fp8,fp8,0,0.013059200346469879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,float16,0,0.0227183997631073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,float16,fp8,0,0.01271039992570877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,2,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,float16,0,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,float16,fp8,0,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,4,128,1,fp8,fp8,0,0.012936000525951386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,float16,0,0.02311840057373047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,float16,fp8,0,0.01263359934091568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,48,8,128,1,fp8,fp8,0,0.012675200402736665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,float16,0,0.01879359930753708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,48,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,float16,0,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,1,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,float16,0,0.018900799751281738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,2,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,float16,0,0.019108800590038298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,4,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,48,8,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,float16,0,0.01873279958963394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,fp8,0,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,float16,0,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,48,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,float16,0,0.018571199476718904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,1,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,float16,float16,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,2,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,float16,fp8,0,0.009174399822950364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,fp8,0,0.25345120429992674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,4,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,float16,0,0.01879359930753708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,48,8,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,float16,float16,0,0.313919997215271
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,1,128,1,fp8,fp8,0,0.2557391881942749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,fp8,0,0.25348639488220215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,float16,0,0.32403519153594973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,float16,fp8,0,0.25384159088134767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,fp8,0,0.2558687925338745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,2,128,1,fp8,fp8,0,0.2527616024017334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,float16,float16,0,0.3275360107421875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,4,128,1,fp8,fp8,0,0.25308640003204347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,float16,float16,0,0.34836640357971194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,48,8,128,1,fp8,fp8,0,0.25382080078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,float16,0,0.28235840797424316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,float16,fp8,0,0.14277119636535646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,48,128,1,fp8,fp8,0,0.14371520280838013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,float16,0,0.16509439945220947
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,float16,fp8,0,0.13056319952011108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,1,128,1,fp8,fp8,0,0.1304543972015381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,fp8,fp8,0,0.13017760515213012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,float16,0,0.16438080072402955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,float16,0,0.178603196144104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,float16,fp8,0,0.1299504041671753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,2,128,1,fp8,fp8,0,0.13052159547805786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,float16,0,0.17100800275802613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,4,128,1,float16,fp8,0,0.13194559812545775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,float16,fp8,0,0.13036320209503174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,48,8,128,1,fp8,fp8,0,0.13089280128479003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,fp8,0,0.07592960000038147
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,float16,float16,0,0.13978240489959717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,48,128,1,fp8,fp8,0,0.07559840083122253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,float16,0,0.09417920112609864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,float16,fp8,0,0.07027360200881957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,1,128,1,fp8,fp8,0,0.07089599967002869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,float16,0,0.09354559779167175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,float16,fp8,0,0.07050560116767883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,2,128,1,fp8,fp8,0,0.07060480117797852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,float16,0,0.09368320107460022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,float16,fp8,0,0.07021440267562866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,4,128,1,fp8,fp8,0,0.0702239990234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,float16,0,0.09971519708633422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,float16,fp8,0,0.07046080231666565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,48,8,128,1,fp8,fp8,0,0.07046080231666565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,float16,0,0.07580159902572632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,float16,fp8,0,0.04317759871482849
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,48,128,1,fp8,fp8,0,0.04327360093593598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,float16,0,0.05754079818725586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,float16,fp8,0,0.04020319879055023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,1,128,1,fp8,fp8,0,0.039689600467681885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,float16,0,0.057596802711486816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,float16,fp8,0,0.04062399864196777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,2,128,1,fp8,fp8,0,0.039766401052474976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,float16,0,0.05758240222930908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,float16,fp8,0,0.04067679941654205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,4,128,1,fp8,fp8,0,0.04054880142211914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,float16,0,0.0575984001159668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,float16,fp8,0,0.040324801206588747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,48,8,128,1,fp8,fp8,0,0.04031359851360321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,float16,0,0.04550240039825439
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,float16,fp8,0,0.02635039985179901
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,48,128,1,fp8,fp8,0,0.026713600754737853
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,float16,0,0.03715519905090332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,float16,fp8,0,0.024831999838352204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,1,128,1,fp8,fp8,0,0.024740800261497498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,fp8,0,0.02486560046672821
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,fp8,fp8,0,0.02497600018978119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,2,128,1,float16,float16,0,0.03728159964084625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,float16,0,0.037136000394821164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,float16,fp8,0,0.024742400646209715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,4,128,1,fp8,fp8,0,0.024860799312591553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,float16,0,0.037894400954246524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,float16,0,0.026950401067733765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,float16,fp8,0,0.024751999974250795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,48,8,128,1,fp8,fp8,0,0.02492479979991913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,float16,0,0.030913600325584413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,float16,fp8,0,0.016633599996566772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,48,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,1,128,1,fp8,fp8,0,0.016715200245380403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,float16,0,0.026900801062583923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,float16,fp8,0,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,2,128,1,fp8,fp8,0,0.016680000722408293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,float16,0,0.02704479992389679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,float16,fp8,0,0.016491200029850005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,4,128,1,fp8,fp8,0,0.016678400337696075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,float16,0,0.02266079932451248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,float16,0,0.02680160105228424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,float16,fp8,0,0.016705599427223206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,48,8,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,float16,0,0.022724799811840057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,float16,fp8,0,0.012571200728416443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,float16,0,0.02266719937324524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,48,128,1,fp8,fp8,0,0.013016000390052795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,float16,fp8,0,0.012535999715328216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,1,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,float16,0,0.02276480048894882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,2,128,1,float16,fp8,0,0.012515200674533844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,4,128,1,fp8,fp8,0,0.01252640038728714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,float16,0,0.022750400006771088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,fp8,fp8,0,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,48,8,128,1,float16,fp8,0,0.012555199861526489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,float16,0,0.019200000166893005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,48,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,float16,0,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,1,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,fp8,0,0.010601600259542465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,float16,fp8,0,0.010574399679899215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,2,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,float16,0,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,4,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,float16,float16,0,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,float16,0,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,48,8,128,1,fp8,fp8,0,0.010264000296592713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,float16,fp8,0,0.010020799934864044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,48,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,float16,0,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,float16,fp8,0,0.009331200271844864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,1,128,1,fp8,fp8,0,0.009028799831867218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,float16,0,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,2,128,1,fp8,fp8,0,0.00843520015478134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,float16,0,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,float16,fp8,0,0.008416000008583068
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,4,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,float16,0,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,float16,fp8,0,0.009719999879598618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,48,8,128,1,fp8,fp8,0,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,fp8,0,14.436329650878907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,fp8,fp8,0,14.425335693359376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,fp8,0,14.27117919921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,fp8,fp8,0,14.43607177734375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,1,128,1,float16,float16,0,18.168505859375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,2,128,1,float16,float16,0,18.347091674804688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,fp8,0,14.396542358398438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,float16,float16,0,18.66248474121094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,fp8,fp8,0,7.499823760986328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,float16,0,9.419707489013671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,fp8,0,7.531147003173828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,4,128,1,fp8,fp8,0,14.871890258789062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,fp8,0,14.867190551757812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,40,128,1,float16,float16,0,11.817623901367188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,fp8,fp8,0,14.927548217773438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,40,8,128,1,float16,float16,0,20.065972900390626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,float16,fp8,0,7.207227325439453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,1,128,1,fp8,fp8,0,7.555156707763672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,fp8,0,7.430171203613281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,fp8,fp8,0,7.3686882019042965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,fp8,0,7.420352172851563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,2,128,1,float16,float16,0,9.166214752197266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,float16,float16,0,9.508771514892578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,fp8,0,3.8005840301513674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,4,128,1,fp8,fp8,0,7.399787139892578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,fp8,fp8,0,4.378168106079102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,40,128,1,float16,float16,0,5.952694320678711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,float16,0,4.6964366912841795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,fp8,0,7.308452606201172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,fp8,fp8,0,7.437016296386719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,float16,fp8,0,3.718699264526367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,1,128,1,fp8,fp8,0,3.6821361541748048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,40,8,128,1,float16,float16,0,9.9799072265625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,float16,0,4.532897567749023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,float16,fp8,0,3.9468128204345705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,fp8,0,3.6913406372070314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,2,128,1,fp8,fp8,0,4.137086486816406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,float16,float16,0,4.6239982604980465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,4,128,1,fp8,fp8,0,3.708241653442383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,fp8,0,3.882233428955078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,float16,0,2.937944030761719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,float16,float16,0,4.878484725952148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,float16,fp8,0,1.9453935623168945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,40,128,1,fp8,fp8,0,2.0053056716918944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,40,8,128,1,fp8,fp8,0,4.08646240234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,fp8,0,1.8781232833862305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,float16,float16,0,2.5345455169677735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,1,128,1,fp8,fp8,0,1.8919536590576171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,float16,0,2.211454391479492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,fp8,fp8,0,1.8489551544189453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,2,128,1,float16,fp8,0,2.0457839965820312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,float16,0,2.2557600021362303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,float16,fp8,0,1.8621631622314454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,4,128,1,fp8,fp8,0,1.8561952590942383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,fp8,0,1.8484016418457032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,float16,float16,0,2.4362415313720702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,40,8,128,1,fp8,fp8,0,2.05731201171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,fp8,0,8.346873474121093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,fp8,fp8,0,8.362351989746093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,fp8,0,8.344999694824219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,fp8,fp8,0,8.384916687011719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,1,128,1,float16,float16,0,10.677670288085938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,fp8,0,8.338267517089843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,2,128,1,float16,float16,0,10.816728210449218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,float16,float16,0,11.011495971679688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,fp8,0,4.411780929565429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,fp8,fp8,0,4.615054321289063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,4,128,1,fp8,fp8,0,8.431368255615235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,float16,0,5.213361740112305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,40,128,1,float16,float16,0,7.517001342773438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,fp8,0,8.562905883789062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,fp8,fp8,0,8.537640380859376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,float16,fp8,0,4.267475128173828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,40,8,128,1,float16,float16,0,11.519703674316407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,1,128,1,fp8,fp8,0,4.225527954101563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,fp8,fp8,0,4.182870483398437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,float16,0,5.215740966796875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,2,128,1,float16,fp8,0,4.580171203613281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,fp8,0,4.256443023681641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,float16,float16,0,5.291088104248047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,4,128,1,fp8,fp8,0,4.213158416748047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,fp8,0,2.218131256103516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,fp8,fp8,0,2.2671152114868165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,fp8,0,4.202848052978515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,fp8,0,2.1178335189819335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,float16,float16,0,5.728073501586914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,40,8,128,1,fp8,fp8,0,4.198308944702148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,float16,float16,0,2.8854496002197267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,40,128,1,float16,float16,0,3.9975296020507813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,1,128,1,fp8,fp8,0,2.1263471603393556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,fp8,0,2.1556991577148437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,float16,float16,0,2.508923149108887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,2,128,1,fp8,fp8,0,2.1022031784057615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,fp8,0,2.1084991455078126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,fp8,0,2.088694381713867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,float16,float16,0,2.8480255126953127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,4,128,1,fp8,fp8,0,2.6502496719360353
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,float16,float16,0,2.678392028808594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,fp8,0,1.162275218963623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,40,8,128,1,fp8,fp8,0,2.1321104049682615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,float16,float16,0,2.2583616256713865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,40,128,1,fp8,fp8,0,1.1634143829345702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,float16,0,1.3549663543701171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,float16,fp8,0,1.2581104278564452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,1,128,1,fp8,fp8,0,1.223630428314209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,float16,0,1.2710415840148925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,float16,fp8,0,1.104745578765869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,2,128,1,fp8,fp8,0,1.3284751892089843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,float16,0,1.3082703590393066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,float16,fp8,0,1.2626303672790526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,4,128,1,fp8,fp8,0,1.1144975662231444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,fp8,fp8,0,1.0929072380065918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,fp8,0,1.281601619720459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,40,8,128,1,float16,float16,0,1.3668319702148437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,fp8,0,6.050078582763672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,fp8,fp8,0,5.843308639526367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,fp8,0,5.9273231506347654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,1,128,1,float16,float16,0,7.373687744140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,fp8,fp8,0,5.870719909667969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,2,128,1,float16,float16,0,7.659979248046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,fp8,0,5.9135185241699215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,float16,float16,0,7.685469055175782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,fp8,fp8,0,3.1279680252075197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,fp8,0,3.3928272247314455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,4,128,1,fp8,fp8,0,6.070636749267578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,40,128,1,float16,float16,0,5.355252838134765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,float16,0,3.651777648925781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,fp8,0,6.021047973632813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,fp8,fp8,0,6.0044609069824215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,40,8,128,1,float16,float16,0,8.085072326660157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,float16,fp8,0,2.961359977722168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,1,128,1,fp8,fp8,0,2.9697664260864256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,fp8,0,2.9589775085449217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,float16,float16,0,3.7804286956787108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,2,128,1,fp8,fp8,0,3.5298736572265623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,fp8,0,3.0300880432128907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,float16,float16,0,3.725838470458984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,4,128,1,fp8,fp8,0,3.316952133178711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,fp8,0,2.9851072311401365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,fp8,0,1.5939951896667481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,fp8,fp8,0,1.6284608840942383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,float16,float16,0,3.9766830444335937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,float16,0,2.0584127426147463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,40,8,128,1,fp8,fp8,0,2.983363151550293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,40,128,1,float16,float16,0,3.236771011352539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,float16,fp8,0,1.5293359756469727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,1,128,1,fp8,fp8,0,1.535865592956543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,fp8,0,1.554513645172119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,float16,float16,0,1.779587173461914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,2,128,1,fp8,fp8,0,1.8545808792114258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,fp8,0,1.5710880279541015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,float16,float16,0,1.9070352554321288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,4,128,1,fp8,fp8,0,1.5861040115356446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,fp8,0,1.4933232307434081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,fp8,fp8,0,1.511190414428711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,fp8,0,0.9323184013366699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,40,8,128,1,float16,float16,0,1.9315376281738281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,fp8,fp8,0,0.8625552177429199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,40,128,1,float16,float16,0,1.5607279777526855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,float16,0,1.0235280036926269
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,float16,fp8,0,0.8051199913024902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,1,128,1,fp8,fp8,0,0.7933008193969726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,float16,0,0.9166704177856445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,float16,fp8,0,0.8845024108886719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,2,128,1,fp8,fp8,0,0.9653280258178711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,float16,0,0.9396160125732422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,float16,fp8,0,0.8942159652709961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,4,128,1,fp8,fp8,0,0.9571344375610351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,fp8,0,0.7898447990417481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,float16,float16,0,0.9911888122558594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,40,8,128,1,fp8,fp8,0,0.8130928039550781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,fp8,0,7.730587005615234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,fp8,fp8,0,7.759004974365235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,fp8,fp8,0,7.898219299316406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,fp8,0,7.8371116638183596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,fp8,0,7.827232360839844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,1,128,1,float16,float16,0,9.827435302734376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,2,128,1,float16,float16,0,9.914060974121094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,float16,float16,0,10.066918182373048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,fp8,0,4.306587219238281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,fp8,fp8,0,4.356740951538086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,float16,0,4.8077552795410154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,4,128,1,fp8,fp8,0,7.9031730651855465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,40,128,1,float16,float16,0,7.650498962402343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,fp8,0,7.9272705078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,fp8,fp8,0,7.914892578125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,float16,fp8,0,3.9410846710205076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,40,8,128,1,float16,float16,0,10.738307189941406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,1,128,1,fp8,fp8,0,3.9149967193603517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,fp8,0,3.933380889892578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,fp8,fp8,0,3.9330207824707033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,2,128,1,float16,float16,0,5.092087936401367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,fp8,0,3.963694381713867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,float16,float16,0,5.139891052246094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,4,128,1,fp8,fp8,0,3.9291217803955076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,fp8,0,3.9377983093261717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,fp8,fp8,0,2.1436111450195314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,fp8,0,2.565166473388672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,fp8,fp8,0,3.9878047943115233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,40,8,128,1,float16,float16,0,5.415660858154297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,40,128,1,float16,float16,0,3.7930721282958983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,float16,0,2.3667055130004884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,float16,fp8,0,1.9682815551757813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,1,128,1,fp8,fp8,0,1.9590112686157226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,float16,0,2.2822351455688477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,float16,fp8,0,1.9856767654418945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,2,128,1,fp8,fp8,0,2.376020812988281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,float16,0,2.475192070007324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,fp8,fp8,0,1.9451616287231446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,4,128,1,float16,fp8,0,2.5499391555786133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,float16,0,2.668230438232422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,fp8,fp8,0,1.961212730407715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,fp8,fp8,0,1.0879695892333985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,40,8,128,1,float16,fp8,0,2.4449520111083984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,fp8,0,1.3419695854187013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,40,128,1,float16,float16,0,2.017292785644531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,float16,0,1.3393759727478027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,float16,fp8,0,1.2067520141601562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,1,128,1,fp8,fp8,0,1.0000432014465332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,float16,0,1.1870896339416503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,float16,fp8,0,1.0003135681152344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,2,128,1,fp8,fp8,0,1.0222415924072266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,fp8,0,1.0069215774536133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,float16,float16,0,1.2578800201416016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,4,128,1,fp8,fp8,0,1.2736623764038086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,fp8,0,1.0003984451293946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,float16,float16,0,1.3116095542907715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,fp8,0,0.5736480236053467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,fp8,fp8,0,0.5751503944396973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,40,128,1,float16,float16,0,0.9915408134460449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,float16,0,0.638318395614624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,40,8,128,1,fp8,fp8,0,1.1352031707763672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,float16,fp8,0,0.6828608036041259
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,1,128,1,fp8,fp8,0,0.5347504138946533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,float16,0,0.6524687767028808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,float16,fp8,0,0.5373184204101562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,fp8,0,0.600705623626709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,2,128,1,fp8,fp8,0,0.536030387878418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,float16,float16,0,0.6364287853240966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,4,128,1,fp8,fp8,0,0.5612639904022216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,fp8,0,0.5350255966186523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,float16,float16,0,0.8005151748657227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,40,8,128,1,fp8,fp8,0,0.6068352222442627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,fp8,0,4.502937698364258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,fp8,fp8,0,4.491486358642578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,fp8,0,4.488248062133789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,fp8,fp8,0,4.537574386596679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,1,128,1,float16,float16,0,5.628684616088867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,fp8,0,4.5418750762939455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,2,128,1,float16,float16,0,5.70555534362793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,float16,float16,0,5.888119888305664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,fp8,0,2.5037744522094725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,fp8,fp8,0,2.860625648498535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,4,128,1,fp8,fp8,0,4.5122528076171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,float16,0,2.624265670776367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,fp8,0,4.619651031494141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,fp8,fp8,0,4.570259094238281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,float16,fp8,0,2.2610912322998047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,40,128,1,float16,float16,0,5.186067199707031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,40,8,128,1,float16,float16,0,6.339316940307617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,1,128,1,fp8,fp8,0,2.2624319076538084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,float16,0,2.7007360458374023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,float16,fp8,0,2.673068809509277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,2,128,1,fp8,fp8,0,2.300326347351074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,float16,0,2.839859199523926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,float16,fp8,0,2.2997039794921874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,4,128,1,fp8,fp8,0,2.2485103607177734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,fp8,0,2.284744071960449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,fp8,0,1.2684656143188477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,float16,float16,0,3.132521629333496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,fp8,fp8,0,1.488468837738037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,float16,0,1.322862434387207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,float16,fp8,0,1.1831775665283204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,40,128,1,float16,float16,0,2.4903024673461913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,40,8,128,1,fp8,fp8,0,2.686599922180176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,1,128,1,fp8,fp8,0,1.1632623672485352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,float16,0,1.3581680297851562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,float16,fp8,0,1.3068991661071778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,2,128,1,fp8,fp8,0,1.270467185974121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,fp8,0,1.1630160331726074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,fp8,fp8,0,1.19760160446167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,4,128,1,float16,float16,0,1.4460800170898438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,fp8,0,1.1547951698303223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,float16,float16,0,1.5435520172119142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,40,8,128,1,fp8,fp8,0,1.2793295860290528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,fp8,0,0.7016863822937012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,fp8,fp8,0,0.6594223976135254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,float16,0,0.6920896053314209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,fp8,fp8,0,0.6048096179962158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,40,128,1,float16,float16,0,1.2695823669433595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,1,128,1,float16,fp8,0,0.7340688228607177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,float16,0,0.7156015872955322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,float16,fp8,0,0.6587808132171631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,2,128,1,fp8,fp8,0,0.6046751976013184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,float16,0,0.7319119930267334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,float16,fp8,0,0.622441577911377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,4,128,1,fp8,fp8,0,0.6048863887786865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,float16,0,0.789625597000122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,float16,fp8,0,0.7106448173522949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,40,8,128,1,fp8,fp8,0,0.7143856048583984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,fp8,0,0.36014399528503416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,float16,float16,0,0.6953264236450195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,40,128,1,fp8,fp8,0,0.36495840549468994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,float16,0,0.3733247995376587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,float16,fp8,0,0.36411199569702146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,1,128,1,fp8,fp8,0,0.33200318813323976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,float16,0,0.42378878593444824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,float16,fp8,0,0.3310767889022827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,2,128,1,fp8,fp8,0,0.33951520919799805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,float16,0,0.39556159973144533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,float16,fp8,0,0.33022561073303225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,4,128,1,fp8,fp8,0,0.3510207891464233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,float16,0,0.4217360019683838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,float16,fp8,0,0.3810111999511719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,40,8,128,1,fp8,fp8,0,0.33173279762268065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,fp8,0,4.324169540405274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,fp8,fp8,0,4.330404663085938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,fp8,0,4.336252975463867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,1,128,1,float16,float16,0,5.315969467163086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,fp8,fp8,0,4.328936004638672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,fp8,0,4.37344970703125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,2,128,1,float16,float16,0,5.4616752624511715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,float16,float16,0,5.7741950988769535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,fp8,0,2.5149391174316404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,fp8,fp8,0,2.6142879486083985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,4,128,1,fp8,fp8,0,4.373467254638672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,float16,0,2.5124624252319334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,fp8,0,4.347761535644532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,fp8,fp8,0,4.544019317626953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,40,128,1,float16,float16,0,5.529273605346679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,float16,fp8,0,2.1901983261108398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,40,8,128,1,float16,float16,0,6.316300964355468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,1,128,1,fp8,fp8,0,2.1761568069458006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,float16,0,2.661511993408203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,float16,fp8,0,2.4518991470336915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,2,128,1,fp8,fp8,0,2.197867202758789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,float16,0,2.7952144622802733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,float16,fp8,0,2.1682191848754884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,4,128,1,fp8,fp8,0,2.1735904693603514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,fp8,0,1.247811222076416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,fp8,0,2.181305694580078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,float16,float16,0,3.0662864685058593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,40,8,128,1,fp8,fp8,0,2.7117263793945314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,float16,0,1.257913589477539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,fp8,fp8,0,1.4575615882873536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,float16,fp8,0,1.1075360298156738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,1,128,1,fp8,fp8,0,1.1058496475219726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,40,128,1,float16,float16,0,2.83196964263916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,float16,0,1.2897968292236328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,fp8,fp8,0,1.175163173675537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,2,128,1,float16,fp8,0,1.2673376083374024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,float16,0,1.4045104026794433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,float16,fp8,0,1.1755151748657227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,4,128,1,fp8,fp8,0,1.1046624183654785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,fp8,0,1.1046879768371582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,float16,float16,0,1.5314559936523438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,40,8,128,1,fp8,fp8,0,1.3189519882202148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,fp8,0,0.6879631996154785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,fp8,fp8,0,0.7244351863861084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,float16,0,0.6498655796051025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,float16,fp8,0,0.5732992172241211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,1,128,1,fp8,fp8,0,0.5732607841491699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,40,128,1,float16,float16,0,1.4160207748413085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,float16,0,0.6751039981842041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,float16,fp8,0,0.6237616062164306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,2,128,1,fp8,fp8,0,0.5902527809143067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,float16,0,0.7176208019256591
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,float16,fp8,0,0.6019680023193359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,fp8,0,0.5734079837799072
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,float16,float16,0,0.7850800037384034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,4,128,1,fp8,fp8,0,0.5730224132537842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,40,8,128,1,fp8,fp8,0,0.6392288208007812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,fp8,0,0.34621760845184324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,fp8,fp8,0,0.3524496078491211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,40,128,1,float16,float16,0,0.7427279949188232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,fp8,0,0.3242016077041626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,float16,0,0.3664160013198853
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,float16,fp8,0,0.3071392059326172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,float16,0,0.3746112108230591
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,1,128,1,fp8,fp8,0,0.30882399082183837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,float16,float16,0,0.3533440113067627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,2,128,1,fp8,fp8,0,0.3074687957763672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,float16,fp8,0,0.3089823961257935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,4,128,1,fp8,fp8,0,0.3248016119003296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,float16,0,0.41871042251586915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,float16,fp8,0,0.3082159996032715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,40,8,128,1,fp8,fp8,0,0.30799360275268556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,fp8,0,0.19364320039749144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,float16,float16,0,0.39168319702148435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,40,128,1,fp8,fp8,0,0.19477759599685668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,float16,0,0.1922368049621582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,float16,fp8,0,0.17454559803009034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,1,128,1,fp8,fp8,0,0.17342400550842285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,float16,0,0.19827840328216553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,float16,fp8,0,0.17484159469604493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,2,128,1,fp8,fp8,0,0.17429280281066895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,float16,0,0.2125727891921997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,float16,fp8,0,0.17450720071792603
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,4,128,1,fp8,fp8,0,0.17409600019454957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,float16,0,0.2326751947402954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,float16,fp8,0,0.17362879514694213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,40,8,128,1,fp8,fp8,0,0.17451679706573486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,fp8,0,2.617411231994629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,fp8,fp8,0,2.622675132751465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,1,128,1,float16,float16,0,3.1245344161987303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,fp8,0,2.6228479385375976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,fp8,fp8,0,2.6196399688720704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,2,128,1,float16,float16,0,3.205086517333984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,float16,0,3.488391876220703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,float16,fp8,0,2.6258655548095704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,fp8,0,1.7638864517211914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,4,128,1,fp8,fp8,0,2.6245759963989257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,fp8,fp8,0,2.622140884399414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,fp8,0,2.987286376953125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,fp8,fp8,0,1.7191375732421874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,40,8,128,1,float16,float16,0,3.8972816467285156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,40,128,1,float16,float16,0,3.7865135192871096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,float16,0,1.5013983726501465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,float16,fp8,0,1.387299156188965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,1,128,1,fp8,fp8,0,1.3366864204406739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,float16,0,1.5699472427368164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,float16,fp8,0,1.3571887969970704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,2,128,1,fp8,fp8,0,1.3628496170043944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,float16,0,1.7556095123291016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,float16,fp8,0,1.325872039794922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,4,128,1,fp8,fp8,0,1.3824432373046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,fp8,0,1.3279199600219727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,fp8,0,0.7893599987030029
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,float16,float16,0,1.9333984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,40,8,128,1,fp8,fp8,0,1.5042896270751953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,float16,0,0.7639887809753418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,fp8,fp8,0,0.9056015968322754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,float16,fp8,0,0.6880144119262696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,40,128,1,float16,float16,0,1.9122896194458008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,1,128,1,fp8,fp8,0,0.6797200202941894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,float16,0,0.7928127765655517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,float16,fp8,0,0.7328383922576904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,2,128,1,fp8,fp8,0,0.7309872150421143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,float16,0,0.8516863822937012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,float16,fp8,0,0.7063295841217041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,4,128,1,fp8,fp8,0,0.6814527988433838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,fp8,0,0.6814080238342285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,float16,float16,0,0.9714672088623046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,40,8,128,1,fp8,fp8,0,0.6805952072143555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,fp8,0,0.41933441162109375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,fp8,fp8,0,0.41236162185668945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,float16,0,0.4047552108764648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,40,128,1,float16,float16,0,1.0144944190979004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,float16,fp8,0,0.3578255891799927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,1,128,1,fp8,fp8,0,0.3565583944320679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,float16,0,0.4140639781951904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,float16,fp8,0,0.3567568063735962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,2,128,1,fp8,fp8,0,0.35680160522460935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,float16,0,0.44525599479675293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,float16,fp8,0,0.3575200080871582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,4,128,1,fp8,fp8,0,0.35767359733581544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,float16,0,0.5038127899169922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,float16,fp8,0,0.35721919536590574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,40,8,128,1,fp8,fp8,0,0.3570672035217285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,fp8,0,0.22320480346679689
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,float16,float16,0,0.5102352142333985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,40,128,1,fp8,fp8,0,0.22400479316711425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,float16,0,0.2244191884994507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,float16,fp8,0,0.19467200040817262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,1,128,1,fp8,fp8,0,0.1966976046562195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,float16,0,0.23005599975585939
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,float16,fp8,0,0.19508960247039794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,2,128,1,fp8,fp8,0,0.19492640495300292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,float16,0,0.24418239593505858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,float16,fp8,0,0.19549280405044556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,4,128,1,fp8,fp8,0,0.19516960382461548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,float16,0,0.2714240074157715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,float16,fp8,0,0.19736800193786622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,40,8,128,1,fp8,fp8,0,0.19600800275802613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,float16,0,0.2773504018783569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,float16,fp8,0,0.12893279790878295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,40,128,1,fp8,fp8,0,0.12921600341796874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,float16,0,0.12912319898605346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,float16,fp8,0,0.11505919694900513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,1,128,1,fp8,fp8,0,0.11407680511474609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,float16,0,0.13498719930648803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,float16,fp8,0,0.11533119678497314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,2,128,1,fp8,fp8,0,0.11468000411987304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,fp8,0,0.11441600322723389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,fp8,fp8,0,0.11409280300140381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,4,128,1,float16,float16,0,0.13946880102157594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,float16,0,0.14983999729156494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,float16,fp8,0,0.1145792007446289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,40,8,128,1,fp8,fp8,0,0.11587040424346924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,fp8,0,2.665774345397949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,fp8,fp8,0,2.6636463165283204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,1,128,1,float16,float16,0,3.0818031311035154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,fp8,0,2.668609619140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,fp8,fp8,0,2.668275260925293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,2,128,1,float16,float16,0,3.1592079162597657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,fp8,0,2.6615840911865236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,float16,float16,0,3.541657638549805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,4,128,1,fp8,fp8,0,2.6654096603393556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,fp8,0,1.6236896514892578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,fp8,0,2.811564826965332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,fp8,fp8,0,2.6682559967041017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,fp8,fp8,0,1.7186767578125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,40,8,128,1,float16,float16,0,4.153332901000977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,float16,0,1.5028767585754395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,float16,fp8,0,1.344204807281494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,1,128,1,fp8,fp8,0,1.352468776702881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,fp8,0,1.3529040336608886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,float16,float16,0,1.589742374420166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,40,128,1,float16,float16,0,4.5882720947265625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,2,128,1,fp8,fp8,0,1.3880576133728026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,fp8,0,1.3439616203308105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,fp8,fp8,0,1.3455151557922362
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,4,128,1,float16,float16,0,1.9236223220825195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,fp8,0,1.5431504249572754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,fp8,0,0.8258912086486816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,fp8,fp8,0,1.3450639724731446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,40,8,128,1,float16,float16,0,2.0573551177978517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,fp8,fp8,0,0.8251999855041504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,float16,0,0.7949935913085937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,float16,fp8,0,0.6861296176910401
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,1,128,1,fp8,fp8,0,0.7087679862976074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,40,128,1,float16,float16,0,2.2963247299194336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,float16,0,0.8005791664123535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,fp8,fp8,0,0.6993616104125977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,2,128,1,float16,fp8,0,0.685430383682251
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,float16,0,0.882532787322998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,fp8,fp8,0,0.6869791984558106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,4,128,1,float16,fp8,0,0.7509007930755616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,fp8,0,0.6863952159881592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,fp8,0,0.425596809387207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,fp8,fp8,0,0.7504047870635986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,40,8,128,1,float16,float16,0,1.0380975723266601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,fp8,fp8,0,0.42657761573791503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,float16,0,0.39385759830474854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,float16,fp8,0,0.3621023893356323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,1,128,1,fp8,fp8,0,0.35627040863037107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,40,128,1,float16,float16,0,1.1658415794372559
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,float16,0,0.45537281036376953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,float16,0,0.4137584209442139
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,float16,fp8,0,0.3779871940612793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,2,128,1,fp8,fp8,0,0.35556640625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,float16,fp8,0,0.3561552047729492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,4,128,1,fp8,fp8,0,0.35651359558105467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,fp8,0,0.35573759078979494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,float16,0,0.5980639934539795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,float16,float16,0,0.5335807800292969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,40,8,128,1,fp8,fp8,0,0.35678720474243164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,float16,fp8,0,0.22989120483398437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,40,128,1,fp8,fp8,0,0.22689759731292725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,float16,0,0.2176815986633301
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,float16,fp8,0,0.1916607975959778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,1,128,1,fp8,fp8,0,0.1909327983856201
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,float16,0,0.22584640979766846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,float16,fp8,0,0.19177600145339965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,2,128,1,fp8,fp8,0,0.19152640104293822
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,float16,0,0.24765920639038086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,float16,fp8,0,0.19160319566726686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,4,128,1,fp8,fp8,0,0.19168800115585327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,float16,0,0.2852047920227051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,float16,fp8,0,0.19145599603652955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,40,8,128,1,fp8,fp8,0,0.19200479984283447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,float16,0,0.31911840438842776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,float16,fp8,0,0.12732160091400146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,40,128,1,fp8,fp8,0,0.12856160402297973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,float16,0,0.12302720546722412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,float16,fp8,0,0.10834879875183105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,1,128,1,fp8,fp8,0,0.10837119817733765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,fp8,fp8,0,0.10857280492782592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,float16,0,0.1313920021057129
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,float16,fp8,0,0.10765119791030883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,2,128,1,fp8,fp8,0,0.10815680027008057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,float16,0,0.13653119802474975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,4,128,1,float16,fp8,0,0.10981600284576416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,float16,0,0.16356320381164552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,float16,fp8,0,0.10883040428161621
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,40,8,128,1,fp8,fp8,0,0.10877280235290528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,float16,0,0.1782863974571228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,float16,fp8,0,0.07613440155982971
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,fp8,0,0.0668175995349884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,40,128,1,fp8,fp8,0,0.07502239942550659
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,float16,0,0.08007519841194152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,float16,fp8,0,0.06704959869384766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,1,128,1,fp8,fp8,0,0.06674879789352417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,float16,float16,0,0.08020319938659667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,2,128,1,fp8,fp8,0,0.06655840277671814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,float16,0,0.08449919819831848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,float16,fp8,0,0.06704000234603882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,4,128,1,fp8,fp8,0,0.06667839884757995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,float16,0,0.09082559943199157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,float16,fp8,0,0.06712480187416077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,40,8,128,1,fp8,fp8,0,0.06654559969902038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,float16,0,1.8843599319458009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,float16,fp8,0,1.6975679397583008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,1,128,1,fp8,fp8,0,1.6961936950683594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,fp8,0,1.6950927734375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,float16,float16,0,1.9655439376831054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,2,128,1,fp8,fp8,0,1.6965551376342773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,fp8,0,1.6984928131103516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,float16,float16,0,2.2371856689453127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,4,128,1,fp8,fp8,0,1.8112703323364259
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,fp8,0,1.6956287384033204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,fp8,0,1.0707759857177734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,fp8,fp8,0,1.1555567741394044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,fp8,fp8,0,1.8354543685913085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,40,8,128,1,float16,float16,0,2.702387237548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,float16,0,0.9528960227966309
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,float16,fp8,0,0.8588447570800781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,1,128,1,fp8,fp8,0,0.8606032371520996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,fp8,0,0.8829808235168457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,float16,float16,0,1.0106255531311035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,40,128,1,float16,float16,0,3.232823944091797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,2,128,1,fp8,fp8,0,0.8842991828918457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,fp8,0,0.8602335929870606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,float16,float16,0,1.124071979522705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,4,128,1,fp8,fp8,0,0.8608912467956543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,fp8,0,0.8970895767211914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,fp8,0,0.5463007926940918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,fp8,fp8,0,0.5531856060028076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,fp8,fp8,0,0.8593600273132325
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,float16,0,0.4856575965881348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,40,8,128,1,float16,float16,0,1.362713623046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,float16,fp8,0,0.4529151916503906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,1,128,1,fp8,fp8,0,0.44048318862915037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,float16,0,0.5127120018005371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,40,128,1,float16,float16,0,1.6379007339477538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,float16,fp8,0,0.4449808120727539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,2,128,1,fp8,fp8,0,0.44031357765197754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,float16,0,0.5730000019073487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,float16,fp8,0,0.4410543918609619
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,4,128,1,fp8,fp8,0,0.4422624111175537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,fp8,0,0.44080801010131837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,float16,float16,0,0.6934175968170166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,40,8,128,1,fp8,fp8,0,0.44109439849853516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,fp8,0,0.28412959575653074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,fp8,fp8,0,0.28543839454650877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,float16,0,0.26158239841461184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,fp8,fp8,0,0.23198881149291992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,40,128,1,float16,float16,0,0.8348719596862793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,float16,fp8,0,0.2333359956741333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,1,128,1,fp8,fp8,0,0.23175199031829835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,float16,0,0.2739311933517456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,2,128,1,float16,fp8,0,0.23119680881500243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,float16,0,0.3048880100250244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,float16,fp8,0,0.23186719417572021
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,4,128,1,fp8,fp8,0,0.2327183961868286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,float16,0,0.3636447906494141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,float16,fp8,0,0.23355679512023925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,40,8,128,1,fp8,fp8,0,0.23185598850250244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,fp8,0,0.15410239696502687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,float16,float16,0,0.43378238677978515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,40,128,1,fp8,fp8,0,0.15473439693450927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,float16,0,0.15226880311965943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,float16,fp8,0,0.12705600261688232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,1,128,1,fp8,fp8,0,0.1265504002571106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,float16,0,0.16035200357437135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,float16,fp8,0,0.12612799406051636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,2,128,1,fp8,fp8,0,0.1268831968307495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,float16,0,0.17180639505386353
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,float16,fp8,0,0.12768800258636476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,4,128,1,fp8,fp8,0,0.12707200050354003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,float16,0,0.20040318965911866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,float16,fp8,0,0.12821120023727417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,40,8,128,1,fp8,fp8,0,0.12852959632873534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,float16,0,0.23385279178619384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,float16,fp8,0,0.088510400056839
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,40,128,1,fp8,fp8,0,0.08865919709205627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,float16,0,0.08715519905090333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,float16,fp8,0,0.07444639801979065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,1,128,1,fp8,fp8,0,0.07483360171318054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,float16,0,0.090447998046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,float16,fp8,0,0.07463359832763672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,2,128,1,fp8,fp8,0,0.0750432014465332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,float16,0,0.0977616012096405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,float16,0,0.1265104055404663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,float16,fp8,0,0.07459359765052795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,4,128,1,fp8,fp8,0,0.07466400265693665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,float16,0,0.10848959684371948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,float16,fp8,0,0.07450399994850158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,40,8,128,1,fp8,fp8,0,0.07470080256462097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,float16,fp8,0,0.05552319884300232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,40,128,1,fp8,fp8,0,0.05545920133590698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,float16,0,0.061515200138092044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,float16,fp8,0,0.04875519871711731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,1,128,1,fp8,fp8,0,0.04847359955310822
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,float16,0,0.06170719861984253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,float16,fp8,0,0.04941920042037964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,2,128,1,fp8,fp8,0,0.049235200881958006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,float16,0,0.06507999897003174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,float16,fp8,0,0.049318400025367734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,4,128,1,fp8,fp8,0,0.04919840097427368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,float16,0,0.07018880248069763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,float16,fp8,0,0.04870559871196747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,40,8,128,1,fp8,fp8,0,0.04902400076389313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,float16,0,1.9891311645507812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,float16,fp8,0,1.8442447662353516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,1,128,1,fp8,fp8,0,1.8417280197143555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,fp8,0,1.8421232223510742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,float16,float16,0,2.1324687957763673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,2,128,1,fp8,fp8,0,1.8422735214233399
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,fp8,0,1.8441999435424805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,float16,float16,0,2.5155487060546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,4,128,1,fp8,fp8,0,1.84324951171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,fp8,0,1.209660816192627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,fp8,0,1.840278434753418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,fp8,fp8,0,1.9447103500366212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,fp8,fp8,0,1.2488800048828126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,float16,0,1.0038384437561034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,40,8,128,1,float16,float16,0,3.1125776290893556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,float16,fp8,0,0.931719970703125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,1,128,1,fp8,fp8,0,0.930735969543457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,float16,0,1.0879440307617188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,float16,fp8,0,0.9398143768310547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,2,128,1,fp8,fp8,0,0.9646400451660156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,40,128,1,float16,float16,0,4.045459365844726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,fp8,0,0.9312656402587891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,float16,float16,0,1.2477408409118653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,4,128,1,fp8,fp8,0,0.9298543930053711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,fp8,0,0.9316399574279786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,fp8,0,0.6151999950408935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,float16,0,0.5245168209075928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,fp8,fp8,0,0.6138175964355469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,float16,fp8,0,0.47480320930480957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,float16,float16,0,1.5919504165649414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,40,8,128,1,fp8,fp8,0,0.9358783721923828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,1,128,1,fp8,fp8,0,0.47501277923583984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,float16,0,0.5577119827270508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,float16,fp8,0,0.47542719841003417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,2,128,1,fp8,fp8,0,0.47449917793273927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,40,128,1,float16,float16,0,2.0386016845703123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,fp8,fp8,0,0.4750864028930664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,float16,0,0.6371088027954102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,4,128,1,float16,fp8,0,0.49334077835083007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,fp8,0,0.47458882331848146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,fp8,fp8,0,0.4745952129364014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,float16,0,0.27778239250183107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,40,8,128,1,float16,float16,0,0.8002911567687988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,fp8,fp8,0,0.31676640510559084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,fp8,0,0.3159168004989624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,float16,fp8,0,0.2535232067108154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,1,128,1,fp8,fp8,0,0.24664480686187745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,40,128,1,float16,float16,0,1.0368335723876954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,float16,0,0.2954432010650635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,float16,fp8,0,0.24760160446166993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,2,128,1,fp8,fp8,0,0.24758400917053222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,float16,0,0.3349519968032837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,float16,fp8,0,0.24637439250946044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,4,128,1,fp8,fp8,0,0.24811999797821044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,float16,0,0.5333424091339112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,float16,0,0.4137087821960449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,float16,fp8,0,0.24762721061706544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,40,8,128,1,fp8,fp8,0,0.24712319374084474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,float16,fp8,0,0.16884640455245972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,40,128,1,fp8,fp8,0,0.16885119676589966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,float16,0,0.15678880214691163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,float16,fp8,0,0.13309439420700073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,1,128,1,fp8,fp8,0,0.13394080400466918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,float16,0,0.16499520540237428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,float16,fp8,0,0.1335487961769104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,2,128,1,fp8,fp8,0,0.13356159925460814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,float16,0,0.1857200026512146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,float16,fp8,0,0.13369920253753662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,4,128,1,fp8,fp8,0,0.13336639404296874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,float16,0,0.22495360374450685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,float16,fp8,0,0.13427679538726806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,40,8,128,1,fp8,fp8,0,0.13358880281448365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,fp8,0,0.09396160244941712
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,float16,float16,0,0.28205599784851076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,40,128,1,fp8,fp8,0,0.09475039839744567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,float16,0,0.08816319704055786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,float16,fp8,0,0.07507839798927307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,1,128,1,fp8,fp8,0,0.0746832013130188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,float16,0,0.09783520102500916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,float16,fp8,0,0.07514560222625732
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,2,128,1,fp8,fp8,0,0.0748192012310028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,float16,0,0.10528800487518311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,float16,fp8,0,0.07499679923057556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,4,128,1,fp8,fp8,0,0.0750656008720398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,float16,0,0.1282256007194519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,float16,fp8,0,0.07528480291366577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,40,8,128,1,fp8,fp8,0,0.07624959945678711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,float16,0,0.1553712010383606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,float16,fp8,0,0.05347679853439331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,40,128,1,fp8,fp8,0,0.053483200073242185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,float16,0,0.057627201080322266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,float16,fp8,0,0.045316800475120544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,1,128,1,fp8,fp8,0,0.04541119933128357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,float16,0,0.05779839754104614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,float16,fp8,0,0.04532800018787384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,2,128,1,fp8,fp8,0,0.04519520103931427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,float16,0,0.0635968029499054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,float16,fp8,0,0.04527199864387512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,4,128,1,fp8,fp8,0,0.04532000124454498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,float16,0,0.06912479996681213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,float16,fp8,0,0.04530400037765503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,40,8,128,1,fp8,fp8,0,0.0452320009469986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,float16,0,0.07685120105743408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,float16,fp8,0,0.037339198589324954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,40,128,1,fp8,fp8,0,0.03720479905605316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,float16,0,0.04522559940814972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,float16,fp8,0,0.033022400736808774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,1,128,1,fp8,fp8,0,0.03314239978790283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,float16,0,0.04533120095729828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,float16,fp8,0,0.03296799957752228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,2,128,1,fp8,fp8,0,0.03311040103435516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,float16,0,0.045284798741340636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,float16,fp8,0,0.03303520083427429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,4,128,1,fp8,fp8,0,0.03309600055217743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,float16,0,0.05149919986724853
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,float16,fp8,0,0.03304480016231537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,40,8,128,1,fp8,fp8,0,0.03295679986476898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,float16,0,1.5437680244445802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,float16,fp8,0,1.4269439697265625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,1,128,1,fp8,fp8,0,1.4284048080444336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,fp8,0,1.425934410095215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,fp8,fp8,0,1.4260784149169923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,2,128,1,float16,float16,0,1.7043024063110352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,float16,0,2.0312784194946287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,float16,fp8,0,1.425977611541748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,4,128,1,fp8,fp8,0,1.4244272232055664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,fp8,0,1.4236063957214355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,fp8,0,0.9933183670043946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,fp8,fp8,0,1.4236191749572753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,float16,0,0.7853727817535401
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,float16,fp8,0,0.7211904048919677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,fp8,fp8,0,0.9903840065002442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,1,128,1,fp8,fp8,0,0.7213647842407227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,40,8,128,1,float16,float16,0,2.6690288543701173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,fp8,0,0.7203360080718995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,float16,float16,0,0.8647104263305664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,2,128,1,fp8,fp8,0,0.719927978515625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,fp8,0,0.7198448181152344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,float16,float16,0,1.0264575958251954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,4,128,1,fp8,fp8,0,0.7204864025115967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,fp8,0,0.7195040225982666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,fp8,fp8,0,0.7196352005004882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,fp8,0,0.5033775806427002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,40,128,1,float16,float16,0,3.800136184692383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,fp8,fp8,0,0.5026768207550049
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,float16,0,0.40828638076782225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,40,8,128,1,float16,float16,0,1.347655963897705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,float16,fp8,0,0.36849920749664306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,1,128,1,fp8,fp8,0,0.36849920749664306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,float16,0,0.4480751991271973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,fp8,fp8,0,0.3674256086349487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,2,128,1,float16,fp8,0,0.3673232078552246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,fp8,0,0.3678528070449829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,float16,float16,0,0.5273680210113525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,40,128,1,float16,float16,0,1.9138175964355468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,4,128,1,fp8,fp8,0,0.3678272008895874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,fp8,0,0.36744959354400636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,float16,float16,0,0.6863728046417237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,40,8,128,1,fp8,fp8,0,0.3672175884246826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,fp8,0,0.2601664066314697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,fp8,fp8,0,0.26054561138153076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,float16,0,0.21894240379333496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,float16,fp8,0,0.19201120138168334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,40,128,1,float16,float16,0,0.9723135948181152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,1,128,1,fp8,fp8,0,0.1919119954109192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,float16,0,0.23989439010620117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,float16,fp8,0,0.1912992000579834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,2,128,1,fp8,fp8,0,0.19138239622116088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,float16,0,0.2781584024429321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,float16,fp8,0,0.1917296051979065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,4,128,1,fp8,fp8,0,0.1918287992477417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,float16,0,0.3559664011001587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,float16,fp8,0,0.19193120002746583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,40,8,128,1,fp8,fp8,0,0.1930415987968445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,fp8,0,0.1378208041191101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,float16,float16,0,0.4999680042266846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,fp8,0,0.10392960309982299
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,40,128,1,fp8,fp8,0,0.13804960250854492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,float16,0,0.12663520574569703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,float16,fp8,0,0.104038405418396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,1,128,1,fp8,fp8,0,0.10436799526214599
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,float16,float16,0,0.13579039573669432
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,2,128,1,fp8,fp8,0,0.103547203540802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,fp8,0,0.10427360534667969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,float16,float16,0,0.15486400127410888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,4,128,1,fp8,fp8,0,0.10410399436950683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,float16,0,0.19303840398788452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,float16,fp8,0,0.10402560234069824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,40,8,128,1,fp8,fp8,0,0.10372320413589478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,float16,0,0.2644223928451538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,float16,fp8,0,0.07727360129356384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,40,128,1,fp8,fp8,0,0.07797279953956604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,float16,0,0.07200800180435181
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,float16,fp8,0,0.059592002630233766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,1,128,1,fp8,fp8,0,0.0590224027633667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,fp8,fp8,0,0.05899199843406677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,float16,0,0.0794431984424591
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,float16,fp8,0,0.05891680121421814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,2,128,1,fp8,fp8,0,0.05974239706993103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,float16,0,0.08901439905166626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,4,128,1,float16,fp8,0,0.05892800092697144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,float16,0,0.11093920469284058
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,float16,fp8,0,0.05897759795188904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,40,8,128,1,fp8,fp8,0,0.05906720161437988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,float16,0,0.14512959718704224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,float16,fp8,0,0.04519039988517761
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,40,128,1,fp8,fp8,0,0.04529280066490173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,float16,0,0.04845919907093048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,float16,fp8,0,0.03664959967136383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,1,128,1,fp8,fp8,0,0.03703519999980927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,float16,0,0.04910399913787842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,float16,fp8,0,0.037118399143218996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,2,128,1,fp8,fp8,0,0.03697920143604279
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,float16,0,0.053544002771377566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,float16,fp8,0,0.03699040114879608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,4,128,1,fp8,fp8,0,0.03683359920978546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,float16,0,0.05990719795227051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,float16,fp8,0,0.03711200058460236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,40,8,128,1,fp8,fp8,0,0.037110400199890134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,float16,0,0.07058719992637634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,float16,fp8,0,0.03097119927406311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,40,128,1,fp8,fp8,0,0.030924800038337707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,float16,0,0.0390608012676239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,float16,fp8,0,0.026849600672721862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,1,128,1,fp8,fp8,0,0.02707360088825226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,float16,0,0.039155200123786926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,float16,fp8,0,0.026841598749160766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,fp8,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,float16,0,0.03909600079059601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,2,128,1,fp8,fp8,0,0.0267984002828598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,float16,fp8,0,0.026774400472640993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,4,128,1,fp8,fp8,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,float16,float16,0,0.04331679940223694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,fp8,0,0.018863999843597413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,40,8,128,1,fp8,fp8,0,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,float16,0,0.04526880085468292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,float16,fp8,0,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,float16,0,0.02906079888343811
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,40,128,1,fp8,fp8,0,0.020710399746894835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,float16,fp8,0,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,fp8,fp8,0,0.018639999628067016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,1,128,1,float16,float16,0,0.028990399837493897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,float16,0,0.02894560098648071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,float16,fp8,0,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,2,128,1,fp8,fp8,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,4,128,1,fp8,fp8,0,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,float16,0,0.030880001187324525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,float16,fp8,0,0.018716800212860107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,40,8,128,1,fp8,fp8,0,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,fp8,0,0.6029503822326661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,fp8,fp8,0,0.6032927989959717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,1,128,1,float16,float16,0,0.6605728149414063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,fp8,0,0.6021312236785888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,float16,float16,0,0.7380000114440918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,2,128,1,fp8,fp8,0,0.6024384021759033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,fp8,0,0.6022511959075928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,float16,float16,0,0.8981776237487793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,4,128,1,fp8,fp8,0,0.602459192276001
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,fp8,0,0.6011168003082276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,fp8,fp8,0,0.6012415885925293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,fp8,0,0.44336800575256347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,40,8,128,1,float16,float16,0,1.2171792030334472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,float16,0,0.34304161071777345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,fp8,fp8,0,0.4440464019775391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,float16,fp8,0,0.30735681056976316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,1,128,1,fp8,fp8,0,0.3065968036651611
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,float16,0,0.382259202003479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,float16,fp8,0,0.30674400329589846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,2,128,1,fp8,fp8,0,0.3068320035934448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,fp8,0,0.3069727897644043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,float16,float16,0,0.46072797775268554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,40,128,1,float16,float16,0,1.8439264297485352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,4,128,1,fp8,fp8,0,0.30658559799194335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,fp8,0,0.3069983959197998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,float16,float16,0,0.6183135986328125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,fp8,0,0.16158080101013184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,40,8,128,1,fp8,fp8,0,0.3073472023010254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,fp8,0,0.22993919849395753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,fp8,0,0.1621359944343567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,fp8,fp8,0,0.22914559841156007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,float16,float16,0,0.18761119842529297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,40,128,1,float16,float16,0,0.9376832008361816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,fp8,fp8,0,0.16103839874267578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,1,128,1,fp8,fp8,0,0.16151360273361207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,float16,float16,0,0.2077104091644287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,2,128,1,fp8,fp8,0,0.16208319664001464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,float16,0,0.24541120529174804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,fp8,0,0.1205456018447876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,4,128,1,float16,fp8,0,0.16191359758377075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,float16,0,0.3224783897399902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,float16,fp8,0,0.16201599836349487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,40,8,128,1,fp8,fp8,0,0.1622159957885742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,float16,float16,0,0.48079838752746584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,40,128,1,fp8,fp8,0,0.12086880207061768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,float16,0,0.10726079940795899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,float16,0,0.13553119897842408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,float16,fp8,0,0.08618080019950866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,1,128,1,fp8,fp8,0,0.08631200194358826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,float16,0,0.11550559997558593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,float16,fp8,0,0.08581759929656982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,2,128,1,fp8,fp8,0,0.08625119924545288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,float16,fp8,0,0.08623679876327514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,4,128,1,fp8,fp8,0,0.08629599809646607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,float16,0,0.17436959743499755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,float16,fp8,0,0.08641759753227234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,40,8,128,1,fp8,fp8,0,0.08641120195388793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,fp8,0,0.06787199974060058
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,float16,float16,0,0.25451200008392333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,40,128,1,fp8,fp8,0,0.06814720034599304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,float16,0,0.062067198753356936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,float16,fp8,0,0.04948799908161163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,1,128,1,fp8,fp8,0,0.04951040148735046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,float16,0,0.07052800059318542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,float16,fp8,0,0.049430400133132935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,2,128,1,fp8,fp8,0,0.049435201287269595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,float16,0,0.07814080119132996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,float16,fp8,0,0.04949440062046051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,4,128,1,fp8,fp8,0,0.04958080053329468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,float16,0,0.10112799406051635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,float16,fp8,0,0.0503711998462677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,40,8,128,1,fp8,fp8,0,0.04943200051784515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,float16,0,0.14041279554367064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,float16,fp8,0,0.041201600432395936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,40,128,1,fp8,fp8,0,0.04129120111465454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,float16,0,0.043398401141166686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,float16,fp8,0,0.032862401008605956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,1,128,1,fp8,fp8,0,0.0326447993516922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,float16,0,0.043412798643112184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,float16,fp8,0,0.03296479880809784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,2,128,1,fp8,fp8,0,0.03300159871578216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,float16,0,0.04813120067119599
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,float16,fp8,0,0.03221760094165802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,4,128,1,fp8,fp8,0,0.03295519948005676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,float16,0,0.05407840013504028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,float16,fp8,0,0.03290719985961914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,40,8,128,1,fp8,fp8,0,0.03292160034179688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,float16,0,0.06786080002784729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,float16,fp8,0,0.026969599723815917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,40,128,1,fp8,fp8,0,0.026931199431419372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,float16,0,0.034980800747871396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,fp8,0,0.02282720059156418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,float16,fp8,0,0.022867199778556824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,1,128,1,fp8,fp8,0,0.02290080040693283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,float16,0,0.0350816011428833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,float16,fp8,0,0.022968000173568724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,2,128,1,fp8,fp8,0,0.022963200509548188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,float16,float16,0,0.03511520028114319
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,4,128,1,fp8,fp8,0,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,float16,0,0.039735999703407285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,float16,fp8,0,0.022918400168418885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,40,8,128,1,fp8,fp8,0,0.023371200263500213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,float16,0,0.04121440052986145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,float16,fp8,0,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,40,128,1,fp8,fp8,0,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,float16,0,0.026841598749160766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,float16,fp8,0,0.01467519998550415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,1,128,1,fp8,fp8,0,0.014711999893188476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,float16,0,0.026804798841476442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,float16,fp8,0,0.016678400337696075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,2,128,1,fp8,fp8,0,0.014667199552059173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,float16,0,0.02688480019569397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,float16,fp8,0,0.016523200273513793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,4,128,1,fp8,fp8,0,0.016731199622154237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,float16,0,0.026795199513435362
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,float16,fp8,0,0.01701440066099167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,40,8,128,1,fp8,fp8,0,0.015132799744606018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,float16,0,0.03096800148487091
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,float16,fp8,0,0.01650079935789108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,40,128,1,fp8,fp8,0,0.016648000478744505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,float16,0,0.024777600169181825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,float16,fp8,0,0.014692799746990204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,float16,0,0.024879999458789825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,float16,fp8,0,0.014643199741840363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,2,128,1,fp8,fp8,0,0.014688000082969666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,1,128,1,fp8,fp8,0,0.014715200662612915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,float16,0,0.024743999540805816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,float16,fp8,0,0.014920000731945039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,4,128,1,fp8,fp8,0,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,float16,0,0.024827200174331664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,float16,fp8,0,0.014723199605941772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,40,8,128,1,fp8,fp8,0,0.014694400131702423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,float16,0,0.40048961639404296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,float16,fp8,0,0.3598752021789551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,1,128,1,fp8,fp8,0,0.35951359272003175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,float16,0,0.43854560852050783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,float16,fp8,0,0.3589344024658203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,2,128,1,fp8,fp8,0,0.35893759727478025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,fp8,0,0.3586463928222656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,float16,float16,0,0.5168496131896972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,4,128,1,fp8,fp8,0,0.3588255882263184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,fp8,0,0.3587647914886475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,fp8,fp8,0,0.2525167942047119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,fp8,fp8,0,0.35852320194244386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,float16,0,0.959819221496582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,float16,0,0.23273279666900634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,40,8,128,1,float16,float16,0,0.6738927841186524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,40,128,1,float16,fp8,0,0.2523024082183838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,float16,0,0.2132352113723755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,float16,fp8,0,0.18520959615707397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,1,128,1,fp8,fp8,0,0.18480639457702636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,float16,fp8,0,0.18474080562591552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,2,128,1,fp8,fp8,0,0.18502880334854127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,float16,0,0.2706991910934448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,float16,fp8,0,0.18495839834213257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,4,128,1,fp8,fp8,0,0.18479360342025758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,fp8,0,0.18502559661865234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,float16,float16,0,0.34750239849090575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,40,8,128,1,fp8,fp8,0,0.1848207950592041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,fp8,0,0.133241605758667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,float16,float16,0,0.4925055980682373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,40,128,1,fp8,fp8,0,0.13331520557403564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,float16,0,0.12173279523849487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,float16,fp8,0,0.0985696017742157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,1,128,1,fp8,fp8,0,0.09858239889144897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,float16,0,0.12901599407196046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,float16,fp8,0,0.09894880056381225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,2,128,1,fp8,fp8,0,0.0987824022769928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,fp8,0,0.09855520129203796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,float16,0,0.1488800048828125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,float16,fp8,0,0.09898560047149658
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,4,128,1,fp8,fp8,0,0.09848639965057374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,float16,float16,0,0.18677279949188233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,40,8,128,1,fp8,fp8,0,0.09920480251312255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,float16,0,0.2578943967819214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,float16,fp8,0,0.07175840139389038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,40,128,1,fp8,fp8,0,0.07191200256347656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,float16,0,0.06788480281829834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,float16,fp8,0,0.053179198503494264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,1,128,1,fp8,fp8,0,0.0529263973236084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,float16,0,0.07612320184707641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,float16,fp8,0,0.053363198041915895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,2,128,1,fp8,fp8,0,0.05320799946784973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,float16,0,0.08585919737815857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,float16,fp8,0,0.05351520180702209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,4,128,1,fp8,fp8,0,0.053502398729324344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,float16,0,0.10466079711914063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,float16,fp8,0,0.053513598442077634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,40,8,128,1,fp8,fp8,0,0.053483200073242185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,float16,0,0.13967200517654418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,float16,fp8,0,0.041140800714492796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,40,128,1,fp8,fp8,0,0.04115679860115051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,float16,0,0.04328800141811371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,float16,fp8,0,0.031064000725746155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,1,128,1,fp8,fp8,0,0.031148800253868104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,float16,0,0.04336319863796234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,float16,fp8,0,0.032023999094963077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,2,128,1,fp8,fp8,0,0.03224799931049347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,float16,0,0.048110398650169375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,float16,fp8,0,0.03131360113620758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,4,128,1,fp8,fp8,0,0.03193280100822449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,float16,0,0.03293919861316681
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,float16,0,0.05413600206375122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,fp8,fp8,0,0.02138399928808212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,float16,fp8,0,0.03118720054626465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,fp8,0,0.02110559940338135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,fp8,fp8,0,0.020857599377632142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,40,8,128,1,fp8,fp8,0,0.0320576012134552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,fp8,0,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,float16,0,0.06606400012969971
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,float16,fp8,0,0.025167998671531678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,40,128,1,fp8,fp8,0,0.02496480047702789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,1,128,1,float16,fp8,0,0.02117439955472946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,2,128,1,float16,float16,0,0.03311040103435516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,float16,float16,0,0.033523198962211606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,4,128,1,fp8,fp8,0,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,float16,0,0.03815839886665344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,float16,fp8,0,0.021065600216388702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,40,8,128,1,fp8,fp8,0,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,float16,0,0.04160960018634796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,float16,fp8,0,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,40,128,1,fp8,fp8,0,0.01863040030002594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,float16,0,0.02733280062675476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,float16,fp8,0,0.01653439998626709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,1,128,1,fp8,fp8,0,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,float16,float16,0,0.026940798759460448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,2,128,1,fp8,fp8,0,0.01675360053777695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,float16,0,0.026814401149749756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,float16,fp8,0,0.01669919937849045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,4,128,1,fp8,fp8,0,0.016683200001716615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,float16,0,0.027961599826812743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,float16,fp8,0,0.0165120005607605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,40,8,128,1,fp8,fp8,0,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,float16,0,0.02682720124721527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,40,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,float16,0,0.021547199785709382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,float16,fp8,0,0.01239520013332367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,1,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,float16,0,0.02205120027065277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,fp8,0,0.012489599734544754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,float16,fp8,0,0.011481600254774094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,2,128,1,fp8,fp8,0,0.010887999832630158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,float16,0,0.02202879935503006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,float16,fp8,0,0.010995200276374817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,4,128,1,fp8,fp8,0,0.012455999851226807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,float16,float16,0,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,40,8,128,1,fp8,fp8,0,0.012483199685811996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,float16,0,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,float16,fp8,0,0.011631999909877778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,fp8,fp8,0,0.01058880016207695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,40,128,1,fp8,fp8,0,0.011932799965143204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,float16,0,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,float16,fp8,0,0.010540799796581268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,fp8,fp8,0,0.010545600205659866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,1,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,2,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,float16,0,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,4,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,float16,0,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,float16,fp8,0,0.01058880016207695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,40,8,128,1,fp8,fp8,0,0.01069599986076355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,float16,0,0.3060352087020874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,fp8,0,0.27092320919036866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,fp8,fp8,0,0.2701711893081665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,float16,fp8,0,0.27051520347595215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,1,128,1,fp8,fp8,0,0.27078559398651125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,2,128,1,float16,float16,0,0.32618720531463624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,fp8,0,0.2701904058456421
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,float16,float16,0,0.36475679874420164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,4,128,1,fp8,fp8,0,0.2707263946533203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,float16,0,0.44170079231262205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,float16,0,0.5397632122039795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,float16,fp8,0,0.26989600658416746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,40,8,128,1,fp8,fp8,0,0.27012319564819337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,float16,fp8,0,0.17422720193862914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,float16,0,0.17511680126190185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,40,128,1,fp8,fp8,0,0.17461440563201905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,float16,0,0.16918400526046753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,float16,fp8,0,0.14044320583343506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,1,128,1,fp8,fp8,0,0.14047520160675048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,float16,fp8,0,0.14041600227355958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,2,128,1,fp8,fp8,0,0.14024000167846679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,float16,0,0.19549599885940552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,fp8,fp8,0,0.14041440486907958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,float16,fp8,0,0.14030400514602662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,4,128,1,fp8,fp8,0,0.14121439456939697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,float16,0,0.2334752082824707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,40,8,128,1,float16,fp8,0,0.13991039991378784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,fp8,0,0.09291520118713378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,float16,float16,0,0.2812432050704956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,40,128,1,fp8,fp8,0,0.09366400241851806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,float16,0,0.09474080204963684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,float16,fp8,0,0.07492960095405579
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,1,128,1,fp8,fp8,0,0.07478079795837403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,float16,0,0.09924319982528687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,float16,fp8,0,0.07500640153884888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,2,128,1,fp8,fp8,0,0.07474719882011413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,fp8,fp8,0,0.07486720085144043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,float16,0,0.10980000495910644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,float16,fp8,0,0.07523040175437927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,4,128,1,fp8,fp8,0,0.07487679719924926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,float16,0,0.12946879863739014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,40,8,128,1,float16,fp8,0,0.07492960095405579
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,float16,0,0.151254403591156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,float16,fp8,0,0.04944800138473511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,40,128,1,fp8,fp8,0,0.05003520250320435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,float16,0,0.05514079928398132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,float16,fp8,0,0.04133920073509216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,1,128,1,fp8,fp8,0,0.041193601489067075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,float16,0,0.055662399530410765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,float16,fp8,0,0.04126560091972351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,2,128,1,fp8,fp8,0,0.041203200817108154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,float16,0,0.05977439880371094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,float16,fp8,0,0.04123519957065582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,4,128,1,fp8,fp8,0,0.041264000535011294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,float16,0,0.06594399809837341
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,float16,fp8,0,0.04124639928340912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,40,8,128,1,fp8,fp8,0,0.041335999965667725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,float16,0,0.07120479941368103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,float16,fp8,0,0.029380801320075988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,40,128,1,fp8,fp8,0,0.02927359938621521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,float16,0,0.03744960129261017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,float16,fp8,0,0.025729599595069885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,1,128,1,fp8,fp8,0,0.024926400184631346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,float16,0,0.0380623996257782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,float16,fp8,0,0.025489598512649536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,2,128,1,fp8,fp8,0,0.025537601113319396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,float16,0,0.0383679986000061
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,float16,fp8,0,0.0263808012008667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,4,128,1,fp8,fp8,0,0.025140801072120668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,float16,0,0.043326398730278014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,float16,fp8,0,0.025279998779296875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,40,8,128,1,fp8,fp8,0,0.026345598697662353
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,float16,0,0.04432159960269928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,fp8,0,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,float16,fp8,0,0.020372800529003143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,40,128,1,fp8,fp8,0,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,float16,0,0.030505600571632385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,float16,fp8,0,0.01751520037651062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,1,128,1,fp8,fp8,0,0.0173552006483078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,float16,float16,0,0.02886880040168762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,2,128,1,fp8,fp8,0,0.018115200102329254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,float16,0,0.029505598545074462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,float16,fp8,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,4,128,1,fp8,fp8,0,0.018559999763965607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,float16,0,0.030881598591804504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,float16,fp8,0,0.017956799268722533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,40,8,128,1,fp8,fp8,0,0.017953599989414214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,float16,0,0.031035199761390686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,float16,fp8,0,0.01462399959564209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,40,128,1,fp8,fp8,0,0.014569599926471711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,float16,0,0.024911999702453613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,float16,fp8,0,0.014443199336528777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,1,128,1,fp8,fp8,0,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,fp8,fp8,0,0.014481599628925323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,float16,0,0.02369920015335083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,float16,fp8,0,0.014327999949455262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,2,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,float16,0,0.024684800207614897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,4,128,1,float16,fp8,0,0.014375999569892883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,float16,0,0.024984000623226164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,float16,fp8,0,0.01446560025215149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,40,8,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,float16,0,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,float16,fp8,0,0.01072319969534874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,40,128,1,fp8,fp8,0,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,float16,0,0.020715199410915375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,1,128,1,fp8,fp8,0,0.01061279997229576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,float16,0,0.018804800510406495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,float16,fp8,0,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,2,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,float16,0,0.020657600462436677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,float16,fp8,0,0.010542400181293488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,4,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,float16,0,0.02078399956226349
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,40,8,128,1,fp8,fp8,0,0.010601600259542465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,float16,float16,0,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,40,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,float16,0,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,float16,fp8,0,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,1,128,1,fp8,fp8,0,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,float16,0,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,2,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,float16,0,0.019113600254058838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,float16,fp8,0,0.010569600015878677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,40,8,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,float16,0,0.2731791973114014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,float16,fp8,0,0.23476641178131102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,1,128,1,fp8,fp8,0,0.23404319286346437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,float16,0,0.27707040309906006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,float16,fp8,0,0.23428480625152587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,2,128,1,fp8,fp8,0,0.23394079208374025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,float16,0,0.29744319915771483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,float16,fp8,0,0.2335200071334839
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,4,128,1,fp8,fp8,0,0.2340320110321045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,float16,0,0.3400975942611694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,float16,fp8,0,0.23405919075012208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,40,8,128,1,fp8,fp8,0,0.2344815969467163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,float16,0,0.3341775894165039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,float16,fp8,0,0.138264000415802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,40,128,1,fp8,fp8,0,0.13805919885635376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,float16,0,0.14672640562057496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,float16,fp8,0,0.12085440158843994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,1,128,1,fp8,fp8,0,0.12138400077819825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,float16,0,0.15185439586639404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,float16,fp8,0,0.12042880058288574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,2,128,1,fp8,fp8,0,0.1203536033630371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,float16,0,0.16088320016860963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,float16,fp8,0,0.11992479562759399
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,4,128,1,fp8,fp8,0,0.12104320526123047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,float16,0,0.18199360370635986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,float16,fp8,0,0.12166880369186402
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,40,8,128,1,fp8,fp8,0,0.11986240148544311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,float16,0,0.17832000255584718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,float16,fp8,0,0.0731823980808258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,40,128,1,fp8,fp8,0,0.07290400266647339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,float16,0,0.08086720108985901
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,float16,fp8,0,0.06496800184249878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,1,128,1,fp8,fp8,0,0.06443200111389161
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,float16,0,0.08249120116233825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,float16,0,0.09462400078773499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,float16,fp8,0,0.06428160071372986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,2,128,1,fp8,fp8,0,0.06434080004692078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,float16,0,0.08653920292854309
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,float16,fp8,0,0.06415039896965027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,4,128,1,fp8,fp8,0,0.06445760130882264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,float16,fp8,0,0.06474239826202392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,40,8,128,1,fp8,fp8,0,0.06492000222206115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,float16,0,0.08665279746055603
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,float16,fp8,0,0.041089600324630736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,40,128,1,fp8,fp8,0,0.041131201386451724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,float16,0,0.0514959990978241
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,float16,fp8,0,0.03716000020503998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,1,128,1,fp8,fp8,0,0.03686560094356537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,float16,0,0.05166879892349243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,float16,fp8,0,0.03712320029735565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,2,128,1,fp8,fp8,0,0.03707840144634247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,float16,0,0.05141599774360657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,float16,fp8,0,0.0369024008512497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,4,128,1,fp8,fp8,0,0.036908799409866334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,float16,0,0.056036800146102905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,float16,fp8,0,0.03707840144634247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,40,8,128,1,fp8,fp8,0,0.03707840144634247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,float16,0,0.051481598615646364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,float16,fp8,0,0.02484000027179718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,40,128,1,fp8,fp8,0,0.02510559856891632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,float16,0,0.03586879968643188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,float16,fp8,0,0.022907200455665588
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,1,128,1,fp8,fp8,0,0.022870400547981264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,float16,0,0.035464000701904294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,float16,fp8,0,0.02289759963750839
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,2,128,1,fp8,fp8,0,0.022873599827289582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,float16,0,0.03638879954814911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,float16,fp8,0,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,4,128,1,fp8,fp8,0,0.022833600640296936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,fp8,fp8,0,0.016702400147914888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,float16,0,0.03691839873790741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,float16,fp8,0,0.022767999768257143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,40,8,128,1,fp8,fp8,0,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,float16,0,0.03332000076770782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,40,128,1,float16,fp8,0,0.017220799624919892
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,float16,0,0.026822400093078614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,float16,fp8,0,0.01653279960155487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,1,128,1,fp8,fp8,0,0.016684800386428833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,float16,0,0.026841598749160766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,float16,fp8,0,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,2,128,1,fp8,fp8,0,0.01653600037097931
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,float16,0,0.026846399903297423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,float16,fp8,0,0.01664319932460785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,4,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,float16,0,0.027521601319313048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,float16,fp8,0,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,40,8,128,1,fp8,fp8,0,0.01655520051717758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,float16,0,0.023630400002002717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,float16,fp8,0,0.012838399410247803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,40,128,1,fp8,fp8,0,0.013148799538612366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,float16,0,0.02282720059156418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,1,128,1,fp8,fp8,0,0.01263200044631958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,float16,0,0.02274080067873001
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,fp8,fp8,0,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,float16,0,0.022878399491310118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,2,128,1,float16,fp8,0,0.012651200592517852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,float16,fp8,0,0.012665599584579468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,4,128,1,fp8,fp8,0,0.012577599287033081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,float16,0,0.022878399491310118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,float16,fp8,0,0.01252480000257492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,40,8,128,1,fp8,fp8,0,0.01271039992570877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,float16,0,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,float16,fp8,0,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,40,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,float16,0,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,1,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,float16,0,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,float16,0,0.018812799453735353
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,4,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,float16,0,0.019644799828529357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,40,8,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,float16,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,fp8,fp8,0,0.009027200192213059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,40,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,float16,0,0.01732639968395233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,float16,fp8,0,0.010119999945163726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,1,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,float16,0,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,2,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,float16,0,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,float16,fp8,0,0.008879999816417693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,4,128,1,fp8,fp8,0,0.009775999933481216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,float16,0,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,40,8,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,float16,0,0.26561439037323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,float16,fp8,0,0.21242239475250244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,1,128,1,fp8,fp8,0,0.21221599578857422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,float16,0,0.2774784088134766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,float16,fp8,0,0.2124095916748047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,2,128,1,fp8,fp8,0,0.21286399364471437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,float16,0,0.2820127964019775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,float16,fp8,0,0.21354079246520996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,4,128,1,fp8,fp8,0,0.21324799060821534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,float16,0,0.303767991065979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,float16,fp8,0,0.21356480121612548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,40,8,128,1,fp8,fp8,0,0.21521921157836915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,float16,0,0.23916161060333252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,float16,fp8,0,0.1200111985206604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,40,128,1,fp8,fp8,0,0.11965440511703491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,float16,0,0.1412160038948059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,float16,fp8,0,0.11049920320510864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,1,128,1,fp8,fp8,0,0.1103983998298645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,float16,0,0.14142559766769408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,float16,fp8,0,0.11152160167694092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,2,128,1,fp8,fp8,0,0.11057920455932617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,float16,0,0.1473952054977417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,float16,fp8,0,0.11024160385131836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,4,128,1,fp8,fp8,0,0.11015520095825196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,float16,0,0.15495840311050416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,float16,fp8,0,0.11070400476455688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,40,8,128,1,fp8,fp8,0,0.11041760444641113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,float16,0,0.11707359552383423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,float16,fp8,0,0.06462399959564209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,40,128,1,fp8,fp8,0,0.06474400162696839
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,float16,0,0.08157119750976563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,float16,fp8,0,0.060438400506973265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,1,128,1,fp8,fp8,0,0.06063680052757263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,float16,0,0.08206080198287964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,float16,fp8,0,0.061217600107192995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,2,128,1,fp8,fp8,0,0.06079040169715881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,float16,0,0.08208960294723511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,float16,fp8,0,0.06041439771652222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,4,128,1,fp8,fp8,0,0.06060799956321716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,float16,0,0.08727200031280517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,float16,fp8,0,0.06031200289726257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,40,8,128,1,fp8,fp8,0,0.06139839887619018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,float16,0,0.06587839722633362
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,float16,fp8,0,0.037299200892448425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,40,128,1,fp8,fp8,0,0.03712159991264343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,float16,0,0.05119360089302063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,float16,fp8,0,0.03509120047092438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,1,128,1,fp8,fp8,0,0.03504000008106232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,float16,0,0.05157279968261719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,float16,fp8,0,0.03520799875259399
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,2,128,1,fp8,fp8,0,0.035175999999046324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,float16,0,0.05146399736404419
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,float16,fp8,0,0.03496319949626923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,4,128,1,fp8,fp8,0,0.0353520005941391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,float16,0,0.05144000053405762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,float16,fp8,0,0.0351936012506485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,40,8,128,1,fp8,fp8,0,0.03504959940910339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,float16,0,0.041223999857902524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,float16,fp8,0,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,40,128,1,fp8,fp8,0,0.022811199724674224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,float16,0,0.034832000732421875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,float16,fp8,0,0.02258719950914383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,1,128,1,fp8,fp8,0,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,float16,0,0.03452959954738617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,float16,fp8,0,0.02269279956817627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,2,128,1,fp8,fp8,0,0.0226160004734993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,float16,0,0.03386560082435608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,float16,fp8,0,0.022617599368095397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,4,128,1,fp8,fp8,0,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,float16,0,0.034971201419830324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,float16,fp8,0,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,fp8,fp8,0,0.01642879992723465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,40,8,128,1,fp8,fp8,0,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,float16,0,0.02701759934425354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,float16,fp8,0,0.016572800278663636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,40,128,1,fp8,fp8,0,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,float16,0,0.02677600085735321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,1,128,1,float16,fp8,0,0.01611679941415787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,float16,0,0.025348800420761108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,float16,fp8,0,0.016531200706958772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,2,128,1,fp8,fp8,0,0.015481600165367126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,float16,0,0.02543199956417084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,fp8,0,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,float16,fp8,0,0.016648000478744505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,4,128,1,fp8,fp8,0,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,float16,0,0.026761600375175477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,float16,fp8,0,0.016463999450206757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,40,8,128,1,fp8,fp8,0,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,fp8,0,0.012611199915409089
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,float16,float16,0,0.022785599529743194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,float16,0,0.022686399519443512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,float16,0,0.021729600429534913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,float16,fp8,0,0.01266079992055893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,40,128,1,fp8,fp8,0,0.012510399520397186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,float16,fp8,0,0.012588800489902496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,1,128,1,fp8,fp8,0,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,float16,0,0.019755199551582336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,float16,float16,0,0.021862399578094483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,2,128,1,fp8,fp8,0,0.012476799637079239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,4,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,float16,0,0.02274399995803833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,float16,fp8,0,0.012495999783277511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,40,8,128,1,fp8,fp8,0,0.012430399656295776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,40,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,float16,0,0.018771199882030486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,1,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,float16,0,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,2,128,1,fp8,fp8,0,0.009337600320577621
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,float16,0,0.018622399866580965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,float16,fp8,0,0.010344000160694122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,4,128,1,fp8,fp8,0,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,float16,0,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,float16,fp8,0,0.009187199920415879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,fp8,fp8,0,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,40,8,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,fp8,0,0.008376000076532364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,float16,0,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,float16,fp8,0,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,40,128,1,fp8,fp8,0,0.010092800110578537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,float16,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,1,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,2,128,1,fp8,fp8,0,0.009270399808883667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,float16,0,0.018352000415325163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,float16,fp8,0,0.008953599631786347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,4,128,1,fp8,fp8,0,0.008900800347328186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,float16,fp8,0,0.009227199852466584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,40,8,128,1,fp8,fp8,0,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,fp8,0,11.429129791259765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,fp8,fp8,0,11.388983917236327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,fp8,0,11.438871765136719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,fp8,fp8,0,11.455081939697266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,1,128,1,float16,float16,0,14.457046508789062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,2,128,1,float16,float16,0,14.596847534179688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,float16,0,14.893275451660156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,float16,fp8,0,11.320267486572266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,4,128,1,fp8,fp8,0,11.832681274414062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,fp8,0,5.896665573120117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,fp8,0,11.805937957763671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,float16,float16,0,9.625204467773438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,float16,0,7.2817741394042965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,fp8,fp8,0,11.873673248291016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,32,128,1,fp8,fp8,0,5.862039947509766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,32,8,128,1,float16,float16,0,16.203121948242188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,float16,fp8,0,5.860843276977539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,1,128,1,fp8,fp8,0,5.721974563598633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,fp8,0,5.737732696533203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,fp8,0,5.740436935424805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,fp8,fp8,0,5.978369522094726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,2,128,1,float16,float16,0,7.4455314636230465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,float16,float16,0,7.6645965576171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,4,128,1,fp8,fp8,0,5.927212905883789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,fp8,0,2.9935535430908202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,fp8,fp8,0,3.3810863494873047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,32,128,1,float16,float16,0,4.652118301391601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,fp8,0,5.828246307373047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,float16,0,3.775151824951172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,fp8,fp8,0,5.892136001586914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,float16,fp8,0,2.8530319213867186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,32,8,128,1,float16,float16,0,7.777798461914062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,1,128,1,fp8,fp8,0,2.9357295989990235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,float16,0,3.5304496765136717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,fp8,fp8,0,2.9966720581054687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,2,128,1,float16,fp8,0,3.3000064849853517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,fp8,0,2.883072090148926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,float16,float16,0,3.7706783294677733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,4,128,1,fp8,fp8,0,2.9336591720581056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,fp8,0,2.8720672607421873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,fp8,0,1.583790397644043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,fp8,fp8,0,1.568723201751709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,32,128,1,float16,float16,0,2.3845632553100584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,float16,float16,0,4.072750473022461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,fp8,0,1.5134943962097167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,fp8,fp8,0,1.4996383666992188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,1,128,1,float16,float16,0,1.950472068786621
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,32,8,128,1,fp8,fp8,0,3.2337600708007814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,float16,0,1.797532844543457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,float16,fp8,0,1.5175408363342284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,2,128,1,fp8,fp8,0,1.5852479934692383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,fp8,0,1.4800736427307128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,float16,float16,0,1.9106895446777343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,4,128,1,fp8,fp8,0,1.4792384147644042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,fp8,0,1.5730159759521485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,float16,float16,0,1.9341983795166016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,32,8,128,1,fp8,fp8,0,1.7997503280639648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,fp8,0,6.690564727783203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,fp8,fp8,0,6.698191833496094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,fp8,0,6.640481567382812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,fp8,fp8,0,6.677207946777344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,1,128,1,float16,float16,0,8.564556884765626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,fp8,0,6.6480354309082035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,2,128,1,float16,float16,0,8.50650863647461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,float16,float16,0,8.801815795898438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,fp8,0,3.460804748535156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,fp8,fp8,0,3.710398483276367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,4,128,1,fp8,fp8,0,6.763162994384766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,fp8,0,6.633902740478516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,float16,0,4.283851242065429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,32,128,1,float16,float16,0,5.979705429077148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,fp8,fp8,0,6.763790130615234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,float16,fp8,0,3.378169631958008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,32,8,128,1,float16,float16,0,9.339446258544921
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,1,128,1,fp8,fp8,0,3.352671813964844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,fp8,0,3.332340621948242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,float16,float16,0,4.158687973022461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,fp8,0,3.2980415344238283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,2,128,1,fp8,fp8,0,3.9341087341308594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,float16,float16,0,4.3035633087158205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,4,128,1,fp8,fp8,0,3.4083118438720703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,fp8,0,1.7686399459838866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,fp8,0,3.437571334838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,fp8,fp8,0,1.765123176574707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,float16,float16,0,4.781227111816406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,fp8,0,1.7025007247924804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,32,8,128,1,fp8,fp8,0,3.351795196533203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,32,128,1,float16,float16,0,3.3782928466796873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,float16,float16,0,2.2098031997680665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,1,128,1,fp8,fp8,0,1.6752159118652343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,fp8,0,1.6669904708862304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,float16,float16,0,2.05303840637207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,2,128,1,fp8,fp8,0,2.021659278869629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,fp8,0,1.7366527557373046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,fp8,fp8,0,1.7107183456420898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,4,128,1,float16,float16,0,2.060531234741211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,fp8,0,1.6998144149780274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,float16,float16,0,2.2039072036743166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,32,8,128,1,fp8,fp8,0,1.6764543533325196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,fp8,0,0.9261792182922364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,fp8,fp8,0,0.9245167732238769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,float16,0,1.1300496101379394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,float16,fp8,0,0.9187791824340821
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,32,128,1,float16,float16,0,1.5068943977355957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,1,128,1,fp8,fp8,0,1.0730832099914551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,float16,0,1.0323951721191407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,float16,fp8,0,0.8771007537841797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,2,128,1,fp8,fp8,0,0.8785615921020508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,fp8,0,0.9019087791442871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,float16,float16,0,1.0868304252624512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,4,128,1,fp8,fp8,0,0.936739158630371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,fp8,0,1.0024880409240722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,fp8,fp8,0,0.8750224113464355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,32,8,128,1,float16,float16,0,1.2675567626953126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,fp8,0,4.67296142578125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,fp8,fp8,0,4.6872303009033205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,fp8,0,4.669009780883789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,fp8,fp8,0,4.641270446777344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,1,128,1,float16,float16,0,5.899843215942383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,2,128,1,float16,float16,0,6.024494552612305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,float16,0,6.168513488769531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,float16,fp8,0,4.615481567382813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,fp8,0,2.766257667541504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,4,128,1,fp8,fp8,0,4.726286315917969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,fp8,fp8,0,2.757595252990723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,32,128,1,float16,float16,0,4.383582305908203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,fp8,0,4.759715270996094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,fp8,fp8,0,4.6973918914794925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,float16,0,2.7160112380981447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,32,8,128,1,float16,float16,0,6.61373291015625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,float16,fp8,0,2.328745651245117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,1,128,1,fp8,fp8,0,2.382846450805664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,fp8,0,2.383782386779785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,fp8,fp8,0,2.367361640930176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,2,128,1,float16,float16,0,2.8822784423828125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,fp8,0,2.616969680786133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,float16,float16,0,2.9543888092041017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,4,128,1,fp8,fp8,0,2.547809600830078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,fp8,0,2.3420591354370117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,fp8,0,1.6347759246826172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,fp8,fp8,0,1.2871343612670898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,float16,float16,0,3.201764678955078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,32,8,128,1,fp8,fp8,0,2.3948863983154296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,32,128,1,float16,float16,0,2.150979232788086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,float16,0,1.590169620513916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,float16,fp8,0,1.2555248260498046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,1,128,1,fp8,fp8,0,1.2089792251586915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,fp8,0,1.1861215591430665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,float16,float16,0,1.4707344055175782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,2,128,1,fp8,fp8,0,1.292961597442627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,fp8,0,1.211246395111084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,float16,float16,0,1.4744607925415039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,4,128,1,fp8,fp8,0,1.3063232421875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,fp8,0,1.1937199592590333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,float16,float16,0,1.562286376953125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,fp8,0,0.6849232196807862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,fp8,fp8,0,0.6806623935699463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,32,128,1,float16,float16,0,1.1527664184570312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,32,8,128,1,fp8,fp8,0,1.1854063987731933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,float16,0,0.8405039787292481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,fp8,fp8,0,0.6375247955322265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,1,128,1,float16,fp8,0,0.6825391769409179
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,fp8,0,0.6402592182159423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,float16,float16,0,0.8314895629882812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,fp8,0,0.6423279762268066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,2,128,1,fp8,fp8,0,0.6354591846466064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,float16,float16,0,0.8280927658081054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,4,128,1,fp8,fp8,0,0.7497568130493164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,fp8,0,0.6373760223388671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,float16,float16,0,0.8197919845581054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,32,8,128,1,fp8,fp8,0,0.6347472190856933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,fp8,0,6.105185699462891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,fp8,fp8,0,6.076628875732422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,fp8,fp8,0,6.040998458862305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,fp8,0,6.0879871368408205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,fp8,0,6.13487663269043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,1,128,1,float16,float16,0,7.617945861816406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,2,128,1,float16,float16,0,7.691275024414063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,float16,float16,0,8.116496276855468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,fp8,0,3.3973888397216796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,fp8,fp8,0,3.6904319763183593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,4,128,1,fp8,fp8,0,6.19880485534668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,float16,0,3.6615665435791014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,fp8,0,6.13995361328125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,fp8,fp8,0,6.176334381103516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,32,128,1,float16,float16,0,6.02022705078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,float16,fp8,0,3.0801023483276366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,32,8,128,1,float16,float16,0,8.827278137207031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,1,128,1,fp8,fp8,0,3.0721120834350586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,fp8,fp8,0,3.0855295181274416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,float16,0,3.754665756225586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,2,128,1,float16,fp8,0,3.403247833251953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,fp8,0,3.1403423309326173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,float16,float16,0,4.0909423828125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,4,128,1,fp8,fp8,0,3.0581104278564455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,fp8,0,3.115620803833008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,fp8,0,1.6821903228759765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,float16,float16,0,4.369222259521484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,32,8,128,1,fp8,fp8,0,3.0867935180664063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,float16,0,1.8534479141235352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,fp8,fp8,0,2.056078338623047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,32,128,1,float16,float16,0,3.3199214935302734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,float16,fp8,0,1.579651165008545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,1,128,1,fp8,fp8,0,1.5390560150146484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,fp8,0,1.572430419921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,float16,float16,0,1.8991920471191406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,2,128,1,fp8,fp8,0,1.9028047561645507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,fp8,0,1.6058416366577148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,fp8,fp8,0,1.5290911674499512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,4,128,1,float16,float16,0,1.9439247131347657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,float16,0,2.0598480224609377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,fp8,fp8,0,0.8792160034179688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,float16,fp8,0,1.5783056259155273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,float16,0,0.9238127708435059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,fp8,0,1.0752191543579102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,float16,fp8,0,0.9713104248046875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,32,128,1,float16,float16,0,1.535580825805664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,1,128,1,fp8,fp8,0,0.9108336448669434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,32,8,128,1,fp8,fp8,0,2.0131856918334963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,fp8,0,0.8056719779968262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,fp8,fp8,0,0.7986080169677734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,2,128,1,float16,float16,0,0.9458080291748047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,float16,0,0.9952431678771972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,float16,fp8,0,0.9366767883300782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,4,128,1,fp8,fp8,0,0.7929215908050538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,fp8,0,0.812889575958252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,float16,float16,0,1.0809920310974122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,fp8,0,0.46281919479370115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,32,8,128,1,fp8,fp8,0,0.8030672073364258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,fp8,fp8,0,0.4734928131103516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,32,128,1,float16,float16,0,0.7985455989837646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,float16,0,0.5183856010437011
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,float16,fp8,0,0.43086719512939453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,1,128,1,fp8,fp8,0,0.43692641258239745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,float16,0,0.5155695915222168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,float16,fp8,0,0.43211679458618163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,2,128,1,fp8,fp8,0,0.4324927806854248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,float16,0,0.5247568130493164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,float16,0,0.5627647876739502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,float16,fp8,0,0.4309663772583008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,4,128,1,fp8,fp8,0,0.45185117721557616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,float16,fp8,0,0.4324160099029541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,32,8,128,1,fp8,fp8,0,0.48439998626708985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,fp8,0,3.5057376861572265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,fp8,fp8,0,3.521139144897461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,fp8,0,3.466843032836914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,1,128,1,float16,float16,0,4.355868911743164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,fp8,fp8,0,3.4598464965820312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,fp8,0,3.5641502380371093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,2,128,1,float16,float16,0,4.538092803955078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,float16,float16,0,4.605750274658203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,fp8,0,1.9922239303588867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,4,128,1,fp8,fp8,0,3.563158416748047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,fp8,fp8,0,2.296721649169922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,fp8,0,3.5982688903808593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,float16,0,2.246193695068359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,32,128,1,float16,float16,0,3.894804763793945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,fp8,fp8,0,4.0023151397705075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,32,8,128,1,float16,float16,0,5.131387329101562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,float16,fp8,0,1.7685871124267578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,1,128,1,fp8,fp8,0,1.8031791687011718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,fp8,0,1.8202447891235352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,float16,float16,0,2.1992448806762694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,2,128,1,fp8,fp8,0,2.0236543655395507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,fp8,0,1.798089599609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,float16,float16,0,2.2909040451049805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,4,128,1,fp8,fp8,0,1.9531631469726562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,fp8,0,1.7914911270141602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,fp8,0,1.0513168334960938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,float16,float16,0,2.54005126953125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,32,8,128,1,fp8,fp8,0,1.762303924560547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,fp8,fp8,0,1.0201423645019532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,fp8,0,0.9298784255981445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,float16,float16,0,1.0992431640625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,32,128,1,float16,float16,0,2.2385536193847657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,1,128,1,fp8,fp8,0,0.9225104331970215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,float16,0,1.0768752098083496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,float16,fp8,0,1.042091178894043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,2,128,1,fp8,fp8,0,0.9390015602111816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,float16,0,1.1491264343261718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,float16,fp8,0,0.9191264152526856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,4,128,1,fp8,fp8,0,1.0749279975891113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,fp8,0,0.5230735778808594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,fp8,0,0.9057215690612793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,float16,float16,0,1.2755999565124512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,32,8,128,1,fp8,fp8,0,1.0487376213073731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,fp8,fp8,0,0.5745952129364014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,float16,0,0.5537631988525391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,fp8,fp8,0,0.48119359016418456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,32,128,1,float16,float16,0,1.0964240074157714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,1,128,1,float16,fp8,0,0.521830415725708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,float16,0,0.5639071941375733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,float16,0,0.6039567947387695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,float16,fp8,0,0.5378096103668213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,2,128,1,fp8,fp8,0,0.511291217803955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,float16,fp8,0,0.47946557998657224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,4,128,1,fp8,fp8,0,0.4787583827972412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,fp8,fp8,0,0.5020224094390869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,fp8,0,0.29796159267425537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,float16,0,0.6567967891693115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,32,8,128,1,float16,fp8,0,0.491487979888916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,float16,float16,0,0.5541935920715332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,32,128,1,fp8,fp8,0,0.2879040002822876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,float16,0,0.3019007921218872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,float16,fp8,0,0.2698863983154297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,1,128,1,fp8,fp8,0,0.2666368007659912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,float16,0,0.31157920360565183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,float16,fp8,0,0.2676095962524414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,2,128,1,fp8,fp8,0,0.2680943965911865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,float16,0,0.32552640438079833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,float16,fp8,0,0.26536800861358645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,4,128,1,fp8,fp8,0,0.2683759927749634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,float16,0,0.3550879955291748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,float16,fp8,0,0.2673104047775269
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,32,8,128,1,fp8,fp8,0,0.2667263984680176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,fp8,0,3.3278369903564453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,fp8,fp8,0,3.3163055419921874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,fp8,0,3.3143455505371096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,1,128,1,float16,float16,0,4.136598587036133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,fp8,fp8,0,3.3741409301757814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,2,128,1,float16,float16,0,4.260547256469726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,float16,0,4.460137557983399
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,float16,fp8,0,3.338052749633789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,4,128,1,fp8,fp8,0,3.3823455810546874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,fp8,0,2.163947105407715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,fp8,0,3.377385711669922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,float16,0,2.0011104583740233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,fp8,fp8,0,2.166788864135742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,fp8,fp8,0,3.891948699951172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,32,128,1,float16,float16,0,4.393668746948242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,32,8,128,1,float16,float16,0,5.176827239990234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,float16,fp8,0,1.7465103149414063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,1,128,1,fp8,fp8,0,1.7771808624267578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,fp8,fp8,0,1.705575942993164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,float16,0,2.112411117553711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,2,128,1,float16,fp8,0,1.9601951599121095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,float16,0,2.246460723876953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,float16,fp8,0,1.6674991607666017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,4,128,1,fp8,fp8,0,1.8176128387451171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,fp8,0,1.705881690979004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,fp8,0,0.9969120025634766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,fp8,fp8,0,1.700155258178711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,32,8,128,1,float16,float16,0,2.5443119049072265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,float16,0,0.9915663719177246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,float16,fp8,0,0.8873456001281739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,fp8,fp8,0,1.158083152770996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,1,128,1,fp8,fp8,0,0.8746975898742676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,float16,0,1.0301615715026855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,32,128,1,float16,float16,0,2.443079948425293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,float16,fp8,0,0.8927087783813477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,2,128,1,fp8,fp8,0,0.961673641204834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,fp8,0,0.867182445526123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,float16,float16,0,1.1586112022399901
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,4,128,1,fp8,fp8,0,0.9795536041259766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,fp8,0,0.9911328315734863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,fp8,0,0.5277567863464355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,float16,float16,0,1.2751328468322753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,32,8,128,1,fp8,fp8,0,0.8576736450195312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,fp8,fp8,0,0.5009247779846191
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,float16,0,0.5256991863250733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,32,128,1,float16,float16,0,1.131980800628662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,float16,fp8,0,0.49100961685180666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,1,128,1,fp8,fp8,0,0.5112095832824707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,float16,0,0.5395088195800781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,float16,fp8,0,0.4512639999389648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,2,128,1,fp8,fp8,0,0.4474480152130127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,float16,0,0.5721375942230225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,float16,fp8,0,0.47839040756225587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,4,128,1,fp8,fp8,0,0.4480559825897217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,fp8,0,0.4815807819366455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,float16,0,0.5871776103973388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,float16,float16,0,0.6606704235076905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,32,8,128,1,fp8,fp8,0,0.4519536018371582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,fp8,fp8,0,0.24408318996429443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,float16,fp8,0,0.2715359926223755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,32,128,1,fp8,fp8,0,0.2709104061126709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,float16,0,0.28606879711151123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,1,128,1,float16,fp8,0,0.24285440444946288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,float16,0,0.28964641094207766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,float16,fp8,0,0.24383840560913086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,float16,0,0.34957759380340575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,2,128,1,fp8,fp8,0,0.24401600360870362
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,float16,0,0.3089024066925049
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,fp8,0,0.15690239667892455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,float16,fp8,0,0.24434239864349366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,4,128,1,fp8,fp8,0,0.24270238876342773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,float16,fp8,0,0.24437599182128905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,32,8,128,1,fp8,fp8,0,0.24572958946228027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,float16,float16,0,0.3168463945388794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,32,128,1,fp8,fp8,0,0.15642240047454833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,fp8,0,0.14164960384368896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,float16,float16,0,0.15854719877243043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,1,128,1,fp8,fp8,0,0.14188319444656372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,float16,0,0.16320639848709106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,fp8,fp8,0,0.1429152011871338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,2,128,1,float16,fp8,0,0.14218560457229615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,float16,0,0.1704319953918457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,float16,fp8,0,0.14200960397720336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,4,128,1,fp8,fp8,0,0.14113919734954833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,float16,0,0.19294240474700927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,float16,fp8,0,0.1422160029411316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,32,8,128,1,fp8,fp8,0,0.1429136037826538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,fp8,0,1.9896368026733398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,fp8,fp8,0,1.9934255599975585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,1,128,1,float16,float16,0,2.3875167846679686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,fp8,0,1.9910591125488282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,fp8,fp8,0,1.9889711380004882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,2,128,1,float16,float16,0,2.461084747314453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,float16,0,2.700724792480469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,float16,fp8,0,2.0105264663696287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,4,128,1,fp8,fp8,0,2.0017391204833985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,fp8,0,1.3932559967041016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,fp8,0,1.9951391220092773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,fp8,fp8,0,1.9913536071777345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,fp8,fp8,0,1.283505630493164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,fp8,0,1.0239423751831054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,float16,float16,0,1.1860063552856446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,1,128,1,fp8,fp8,0,1.0767855644226074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,32,128,1,float16,float16,0,3.0085968017578124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,32,8,128,1,float16,float16,0,3.5359840393066406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,float16,0,1.2332127571105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,float16,fp8,0,1.0348480224609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,2,128,1,fp8,fp8,0,1.0656991958618165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,fp8,0,1.1164591789245606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,fp8,fp8,0,1.0256447792053223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,4,128,1,float16,float16,0,1.355959987640381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,fp8,0,0.6256415843963623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,fp8,0,1.050819206237793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,fp8,fp8,0,1.1395808219909669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,32,8,128,1,float16,float16,0,1.5926992416381835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,float16,0,0.6013887882232666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,fp8,fp8,0,0.7096144199371338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,float16,fp8,0,0.5650944232940673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,1,128,1,fp8,fp8,0,0.5232160091400146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,32,128,1,float16,float16,0,1.5154784202575684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,float16,0,0.6361663818359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,float16,fp8,0,0.5497151851654053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,2,128,1,fp8,fp8,0,0.5211376190185547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,fp8,0,0.5477519989013672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,float16,float16,0,0.689731216430664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,4,128,1,fp8,fp8,0,0.5497263908386231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,fp8,0,0.5214640140533447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,fp8,fp8,0,0.5349279880523682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,32,8,128,1,float16,float16,0,0.8093343734741211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,fp8,0,0.32090559005737307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,fp8,fp8,0,0.3258687973022461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,32,128,1,float16,float16,0,0.7831583976745605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,float16,0,0.31910080909729005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,float16,fp8,0,0.2765520095825195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,1,128,1,fp8,fp8,0,0.2767535924911499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,float16,0,0.3393280029296875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,float16,fp8,0,0.29556319713592527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,2,128,1,fp8,fp8,0,0.27784481048583987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,float16,0,0.3641520023345947
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,float16,fp8,0,0.2771712064743042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,4,128,1,fp8,fp8,0,0.27748000621795654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,float16,0,0.42087841033935547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,float16,fp8,0,0.2766736030578613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,32,8,128,1,fp8,fp8,0,0.2788847923278809
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,fp8,0,0.17573120594024658
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,float16,float16,0,0.41751837730407715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,32,128,1,fp8,fp8,0,0.17547359466552734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,float16,0,0.18158400058746338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,float16,fp8,0,0.1545456051826477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,1,128,1,fp8,fp8,0,0.15413440465927125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,float16,0,0.18801599740982056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,float16,fp8,0,0.15413600206375122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,2,128,1,fp8,fp8,0,0.1540303945541382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,float16,0,0.20667359828948975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,float16,fp8,0,0.15426559448242189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,4,128,1,fp8,fp8,0,0.1546623945236206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,float16,0,0.23299360275268555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,float16,fp8,0,0.15467360019683837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,32,8,128,1,fp8,fp8,0,0.15443040132522584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,float16,0,0.22649118900299073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,float16,fp8,0,0.10364960432052613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,32,128,1,fp8,fp8,0,0.10550240278244019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,float16,0,0.10684959888458252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,float16,fp8,0,0.09399999976158142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,1,128,1,fp8,fp8,0,0.09385280013084411
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,float16,0,0.11155040264129638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,float16,fp8,0,0.09415680170059204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,2,128,1,fp8,fp8,0,0.09363679885864258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,float16,0,0.11665120124816894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,float16,fp8,0,0.09454240202903748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,4,128,1,fp8,fp8,0,0.09357759952545167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,float16,0,0.12815040349960327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,float16,fp8,0,0.09394720196723938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,32,8,128,1,fp8,fp8,0,0.09362400174140931
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,fp8,0,1.986123275756836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,fp8,fp8,0,1.9867088317871093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,1,128,1,float16,float16,0,2.380891227722168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,fp8,0,1.9869792938232422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,fp8,fp8,0,1.989918327331543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,2,128,1,float16,float16,0,2.5316287994384767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,fp8,0,1.9875568389892577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,float16,float16,0,2.8118528366088866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,4,128,1,fp8,fp8,0,1.99597110748291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,fp8,0,1.2483263969421388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,fp8,0,2.002697563171387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,fp8,fp8,0,2.002662467956543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,fp8,fp8,0,1.3355695724487304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,float16,0,1.1713312149047852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,32,8,128,1,float16,float16,0,3.4413406372070314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,float16,fp8,0,1.01692476272583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,1,128,1,fp8,fp8,0,1.0242048263549806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,float16,0,1.2465184211730957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,float16,fp8,0,1.035108757019043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,32,128,1,float16,float16,0,3.8219535827636717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,2,128,1,fp8,fp8,0,1.0082287788391113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,fp8,0,1.056217575073242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,fp8,fp8,0,1.0080672264099122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,4,128,1,float16,float16,0,1.4465472221374511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,fp8,0,1.095259189605713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,fp8,fp8,0,1.008011245727539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,fp8,0,0.6213679790496827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,32,8,128,1,float16,float16,0,1.7333328247070312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,fp8,fp8,0,0.6274223804473877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,float16,0,0.5931295871734619
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,float16,fp8,0,0.5353600025177002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,1,128,1,fp8,fp8,0,0.5329999923706055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,float16,0,0.632097578048706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,float16,fp8,0,0.5149951934814453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,2,128,1,fp8,fp8,0,0.5284160137176513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,fp8,0,0.5159071922302246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,32,128,1,float16,float16,0,1.8168912887573243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,float16,float16,0,0.7138288021087646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,fp8,0,0.5165696144104004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,fp8,0,0.3239903926849365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,fp8,fp8,0,0.5154831886291504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,8,128,1,float16,float16,0,0.874351978302002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,fp8,fp8,0,0.32456159591674805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,32,4,128,1,fp8,fp8,0,0.5199168205261231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,float16,0,0.31002719402313234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,float16,fp8,0,0.27106080055236814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,1,128,1,fp8,fp8,0,0.2694927930831909
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,32,128,1,float16,float16,0,0.925875186920166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,float16,0,0.3707103967666626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,float16,0,0.3292543888092041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,float16,fp8,0,0.2704576015472412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,2,128,1,fp8,fp8,0,0.2719199895858765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,fp8,fp8,0,0.2709903955459595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,float16,fp8,0,0.2704432010650635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,4,128,1,fp8,fp8,0,0.2704511880874634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,fp8,0,0.2708143949508667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,32,8,128,1,float16,float16,0,0.4476799964904785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,fp8,0,0.17544000148773192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,float16,float16,0,0.47746877670288085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,32,128,1,fp8,fp8,0,0.17663040161132812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,float16,0,0.17519359588623046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,float16,fp8,0,0.14732160568237304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,1,128,1,fp8,fp8,0,0.14819999933242797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,float16,0,0.18269599676132203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,float16,fp8,0,0.14753439426422119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,2,128,1,fp8,fp8,0,0.14796960353851318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,float16,0,0.20329599380493163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,float16,fp8,0,0.14920320510864257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,4,128,1,fp8,fp8,0,0.14797439575195312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,float16,0,0.24190239906311034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,float16,fp8,0,0.14903359413146972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,32,8,128,1,fp8,fp8,0,0.14904639720916749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,float16,0,0.25706241130828855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,float16,fp8,0,0.10122560262680054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,32,128,1,fp8,fp8,0,0.1009935975074768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,fp8,fp8,0,0.08666080236434937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,float16,0,0.10175360441207885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,fp8,0,0.08637760281562805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,float16,fp8,0,0.08666399717330933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,1,128,1,fp8,fp8,0,0.08672320246696472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,float16,0,0.10590560436248779
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,2,128,1,float16,fp8,0,0.08668799996376038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,float16,float16,0,0.11555680036544799
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,4,128,1,fp8,fp8,0,0.08698239922523499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,float16,0,0.13662240505218506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,float16,fp8,0,0.0861519992351532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,fp8,0,0.055555200576782225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,32,8,128,1,fp8,fp8,0,0.08705919981002808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,float16,0,0.1432800054550171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,float16,fp8,0,0.061582398414611814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,32,128,1,fp8,fp8,0,0.06189119815826416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,fp8,0,0.05535039901733398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,float16,float16,0,0.06966879963874817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,1,128,1,fp8,fp8,0,0.05557439923286438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,float16,0,0.0695680022239685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,float16,fp8,0,0.05552319884300232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,2,128,1,fp8,fp8,0,0.05558879971504212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,float16,float16,0,0.07329760193824768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,4,128,1,fp8,fp8,0,0.055473601818084715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,float16,0,0.0806335985660553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,float16,fp8,0,0.055471998453140256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,32,8,128,1,fp8,fp8,0,0.05554720163345337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,fp8,0,1.2499247550964356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,float16,float16,0,1.4398880004882812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,1,128,1,fp8,fp8,0,1.248840045928955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,fp8,0,1.2508399963378907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,float16,float16,0,1.564628791809082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,2,128,1,fp8,fp8,0,1.2507663726806642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,fp8,0,1.2497568130493164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,float16,float16,0,1.796776008605957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,4,128,1,fp8,fp8,0,1.3208239555358887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,fp8,0,1.249852752685547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,fp8,0,0.7947743892669678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,fp8,fp8,0,1.3004480361938477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,fp8,fp8,0,0.8144399642944335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,32,8,128,1,float16,float16,0,2.293592071533203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,float16,0,0.7324607849121094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,float16,fp8,0,0.6410768032073975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,1,128,1,fp8,fp8,0,0.6350895881652832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,float16,0,0.795195198059082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,32,128,1,float16,float16,0,2.556718444824219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,float16,fp8,0,0.6381360054016113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,2,128,1,fp8,fp8,0,0.6444511890411377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,fp8,0,0.6385471820831299
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,fp8,fp8,0,0.6354959964752197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,4,128,1,float16,float16,0,0.9168543815612793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,fp8,0,0.6360608100891113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,fp8,fp8,0,0.6366144180297851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,fp8,0,0.4087088108062744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,fp8,fp8,0,0.40869760513305664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,float16,0,0.37992000579833984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,32,8,128,1,float16,float16,0,1.1536128044128418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,float16,fp8,0,0.32835519313812256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,1,128,1,fp8,fp8,0,0.3285056114196777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,float16,0,0.40716800689697263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,float16,0,0.4648831844329834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,float16,fp8,0,0.328222393989563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,4,128,1,fp8,fp8,0,0.32822399139404296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,32,128,1,float16,float16,0,1.2961296081542968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,float16,fp8,0,0.32924480438232423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,fp8,fp8,0,0.3288144111633301
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,2,128,1,fp8,fp8,0,0.32899200916290283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,fp8,0,0.32844159603118894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,32,8,128,1,float16,float16,0,0.5877984046936036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,fp8,0,0.21488480567932128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,fp8,fp8,0,0.2163327932357788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,fp8,0,0.17542879581451415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,fp8,fp8,0,0.17469760179519653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,float16,0,0.2066431999206543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,32,128,1,float16,float16,0,0.66004638671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,fp8,fp8,0,0.17491840124130248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,float16,fp8,0,0.17468639612197875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,1,128,1,fp8,fp8,0,0.1751919984817505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,fp8,fp8,0,0.17579360008239747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,2,128,1,float16,float16,0,0.21914880275726317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,float16,0,0.25034399032592775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,4,128,1,float16,fp8,0,0.17513279914855956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,float16,0,0.30881600379943847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,32,8,128,1,float16,fp8,0,0.17585439682006837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,fp8,0,0.11949280500411988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,float16,float16,0,0.3477360010147095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,32,128,1,fp8,fp8,0,0.11969920396804809
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,float16,0,0.12223199605941773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,float16,0,0.14393919706344604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,float16,fp8,0,0.09805279970169067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,1,128,1,fp8,fp8,0,0.09837599992752075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,float16,0,0.1293951988220215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,float16,fp8,0,0.09814559817314147
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,2,128,1,fp8,fp8,0,0.0978879988193512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,float16,fp8,0,0.09817919731140137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,4,128,1,fp8,fp8,0,0.09749280214309693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,float16,0,0.17214399576187134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,float16,fp8,0,0.09895359873771667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,32,8,128,1,fp8,fp8,0,0.09916639924049378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,float16,0,0.18995360136032105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,float16,fp8,0,0.06945279836654664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,fp8,0,0.05975840091705322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,32,128,1,fp8,fp8,0,0.06895679831504822
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,float16,float16,0,0.07263680100440979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,1,128,1,fp8,fp8,0,0.05934240221977234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,float16,0,0.07624319791793824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,float16,fp8,0,0.05933600068092346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,2,128,1,fp8,fp8,0,0.05915039777755737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,float16,0,0.0807200014591217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,float16,0,0.09527999758720399
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,float16,fp8,0,0.05963680148124695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,4,128,1,fp8,fp8,0,0.05919520258903503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,float16,0,0.0911952018737793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,float16,fp8,0,0.059038400650024414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,32,8,128,1,fp8,fp8,0,0.05993760228157043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,float16,fp8,0,0.04321280121803284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,32,128,1,fp8,fp8,0,0.04349600076675415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,float16,0,0.05177599787712097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,float16,fp8,0,0.039238399267196654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,1,128,1,fp8,fp8,0,0.03916800022125244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,float16,0,0.051976001262664794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,float16,fp8,0,0.03913759887218475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,2,128,1,fp8,fp8,0,0.03927040100097656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,float16,0,0.05553920269012451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,float16,fp8,0,0.03907679915428162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,4,128,1,fp8,fp8,0,0.03920960128307342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,float16,0,0.06141600012779236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,float16,fp8,0,0.03917120099067688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,32,8,128,1,fp8,fp8,0,0.039182400703430174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,float16,0,1.5232000350952148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,float16,fp8,0,1.3319855690002442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,1,128,1,fp8,fp8,0,1.3325152397155762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,fp8,0,1.3338735580444336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,fp8,fp8,0,1.331276798248291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,2,128,1,float16,float16,0,1.6857791900634767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,float16,0,2.0153200149536135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,fp8,fp8,0,1.3303024291992187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,4,128,1,float16,fp8,0,1.4666336059570313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,fp8,0,1.3323087692260742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,fp8,fp8,0,1.3307807922363282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,fp8,0,0.9081808090209961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,float16,0,0.7823631763458252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,fp8,fp8,0,0.8850447654724121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,float16,fp8,0,0.673854398727417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,32,8,128,1,float16,float16,0,2.6520511627197267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,1,128,1,fp8,fp8,0,0.6732160091400147
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,float16,0,0.8536640167236328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,fp8,fp8,0,0.6739456176757812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,2,128,1,float16,fp8,0,0.725436782836914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,fp8,0,0.6740943908691406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,32,128,1,float16,float16,0,3.2235279083251953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,float16,float16,0,1.014025592803955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,4,128,1,fp8,fp8,0,0.6742847919464111
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,fp8,0,0.6758927822113037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,fp8,fp8,0,0.6746352195739747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,fp8,0,0.4506944179534912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,32,8,128,1,float16,float16,0,1.3339903831481934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,fp8,fp8,0,0.4510496139526367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,float16,0,0.4001455783843994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,float16,fp8,0,0.3455024003982544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,1,128,1,fp8,fp8,0,0.3456959962844849
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,fp8,0,0.3445791959762573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,float16,float16,0,0.43573598861694335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,2,128,1,fp8,fp8,0,0.34509921073913574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,fp8,0,0.3471951961517334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,fp8,fp8,0,0.34621920585632326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,4,128,1,float16,float16,0,0.5145967960357666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,32,128,1,float16,float16,0,1.6411472320556642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,fp8,0,0.34627680778503417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,fp8,fp8,0,0.34696800708770753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,32,8,128,1,float16,float16,0,0.6791408061981201
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,fp8,0,0.2366719961166382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,float16,float16,0,0.8191488265991211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,32,128,1,fp8,fp8,0,0.2349776029586792
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,float16,0,0.21277599334716796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,float16,fp8,0,0.18263520002365113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,float16,0,0.2713903903961182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,1,128,1,fp8,fp8,0,0.18152960538864135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,float16,0,0.23320960998535156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,float16,fp8,0,0.18127679824829102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,float16,0,0.35031039714813234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,2,128,1,fp8,fp8,0,0.18349920511245726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,float16,fp8,0,0.1822751998901367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,fp8,fp8,0,0.12645920515060424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,4,128,1,fp8,fp8,0,0.18194880485534667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,float16,fp8,0,0.1827952027320862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,32,8,128,1,fp8,fp8,0,0.18258399963378907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,fp8,0,0.12680480480194092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,32,128,1,float16,float16,0,0.4240592002868652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,float16,0,0.1264863967895508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,float16,fp8,0,0.09965440034866332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,1,128,1,fp8,fp8,0,0.09913600087165833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,float16,0,0.1338863968849182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,float16,fp8,0,0.0986240029335022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,2,128,1,fp8,fp8,0,0.09970239996910095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,float16,0,0.1530751943588257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,float16,fp8,0,0.10017119646072388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,4,128,1,fp8,fp8,0,0.10054080486297608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,float16,0,0.19158560037612915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,float16,fp8,0,0.1001855969429016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,32,8,128,1,fp8,fp8,0,0.10105600357055664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,float16,0,0.2256704092025757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,float16,fp8,0,0.07205439805984497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,32,128,1,fp8,fp8,0,0.07274720072746277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,float16,0,0.07121760249137879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,float16,fp8,0,0.057687997817993164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,1,128,1,fp8,fp8,0,0.05780320167541504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,float16,0,0.07618399858474731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,float16,fp8,0,0.05787680149078369
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,2,128,1,fp8,fp8,0,0.05780640244483948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,float16,0,0.08352320194244385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,float16,fp8,0,0.05800960063934326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,4,128,1,fp8,fp8,0,0.058019202947616574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,float16,0,0.10947680473327637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,float16,fp8,0,0.05820959806442261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,32,8,128,1,fp8,fp8,0,0.05763999819755554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,float16,0,0.12131520509719848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,float16,fp8,0,0.043278399109840396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,32,128,1,fp8,fp8,0,0.04351359903812409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,float16,0,0.04940800070762634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,float16,fp8,0,0.03715679943561554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,1,128,1,fp8,fp8,0,0.03707520067691803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,float16,0,0.04964320063591003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,float16,fp8,0,0.03701919913291931
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,2,128,1,fp8,fp8,0,0.037062400579452516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,float16,0,0.054308801889419556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,fp8,0,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,float16,fp8,0,0.037136000394821164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,4,128,1,fp8,fp8,0,0.03722560107707977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,float16,0,0.060249602794647215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,float16,fp8,0,0.037143999338150026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,32,8,128,1,fp8,fp8,0,0.03713119924068451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,float16,float16,0,0.059617602825164796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,32,128,1,fp8,fp8,0,0.026915198564529418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,float16,0,0.03498240113258362
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,float16,fp8,0,0.023099200427532197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,1,128,1,fp8,fp8,0,0.023073600232601167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,float16,0,0.03515200018882751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,float16,fp8,0,0.02300959974527359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,2,128,1,fp8,fp8,0,0.022859199345111846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,float16,0,0.03505440056324005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,float16,fp8,0,0.022916799783706664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,4,128,1,fp8,fp8,0,0.02348320037126541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,float16,0,0.039780798554420474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,float16,fp8,0,0.024753600358963013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,32,8,128,1,fp8,fp8,0,0.023158399760723113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,float16,0,1.139801597595215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,float16,fp8,0,1.0055855751037597
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,1,128,1,fp8,fp8,0,1.0049903869628907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,fp8,0,1.0057503700256347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,float16,float16,0,1.3135616302490234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,2,128,1,fp8,fp8,0,1.0074527740478516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,fp8,0,1.004641628265381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,float16,float16,0,1.629217529296875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,4,128,1,fp8,fp8,0,1.0023008346557618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,fp8,0,1.005504035949707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,fp8,fp8,0,1.004753589630127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,fp8,0,0.7165472030639648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,float16,0,0.5839136123657227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,float16,fp8,0,0.5099055767059326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,fp8,fp8,0,0.7146143913269043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,1,128,1,fp8,fp8,0,0.5095232009887696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,32,8,128,1,float16,float16,0,2.2689888000488283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,float16,0,0.6630943775177002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,float16,fp8,0,0.5083903789520263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,2,128,1,fp8,fp8,0,0.5098464012145996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,fp8,0,0.5092192173004151
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,fp8,fp8,0,0.5089056015014648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,4,128,1,float16,float16,0,0.8290639877319336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,32,128,1,float16,float16,0,3.004497528076172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,fp8,0,0.5083087921142578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,fp8,fp8,0,0.5083600044250488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,fp8,0,0.364355206489563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,32,8,128,1,float16,float16,0,1.1466815948486329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,fp8,fp8,0,0.36420478820800783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,float16,0,0.3081376075744629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,float16,fp8,0,0.26336159706115725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,1,128,1,fp8,fp8,0,0.2609616041183472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,float16,0,0.3452768087387085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,float16,fp8,0,0.2621056079864502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,32,128,1,float16,float16,0,1.5146047592163085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,2,128,1,fp8,fp8,0,0.2614847898483276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,float16,0,0.42800321578979494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,float16,fp8,0,0.26200640201568604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,4,128,1,fp8,fp8,0,0.26282880306243894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,fp8,0,0.2610208034515381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,float16,float16,0,0.5839968204498291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,32,8,128,1,fp8,fp8,0,0.262281608581543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,fp8,0,0.18936480283737184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,fp8,fp8,0,0.1895583987236023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,fp8,0,0.13805919885635376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,float16,0,0.1688495993614197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,float16,fp8,0,0.13929599523544312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,32,128,1,float16,float16,0,0.7701663970947266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,fp8,fp8,0,0.13888479471206666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,1,128,1,fp8,fp8,0,0.13801759481430054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,float16,float16,0,0.1880336046218872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,fp8,fp8,0,0.1393072009086609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,2,128,1,fp8,fp8,0,0.1376271963119507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,fp8,0,0.13887039422988892
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,4,128,1,float16,float16,0,0.22864480018615724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,fp8,0,0.13906079530715942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,32,8,128,1,float16,float16,0,0.3074048042297363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,fp8,0,0.10279999971389771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,fp8,fp8,0,0.1032863974571228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,32,128,1,float16,float16,0,0.39659039974212645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,float16,0,0.10122720003128052
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,float16,fp8,0,0.07585920095443725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,1,128,1,fp8,fp8,0,0.0755024015903473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,float16,0,0.10862079858779908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,float16,fp8,0,0.07682880163192748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,2,128,1,fp8,fp8,0,0.07539680004119872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,float16,0,0.12831519842147826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,float16,fp8,0,0.07663360238075256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,4,128,1,fp8,fp8,0,0.0760479986667633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,float16,0,0.16660640239715577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,float16,fp8,0,0.07680799961090087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,32,8,128,1,fp8,fp8,0,0.07671200037002564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,float16,0,0.21216158866882323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,float16,fp8,0,0.05799040198326111
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,32,128,1,fp8,fp8,0,0.05804479718208313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,float16,0,0.05623360276222229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,float16,0,0.06963199973106385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,float16,fp8,0,0.043819200992584226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,1,128,1,fp8,fp8,0,0.04336639940738678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,float16,0,0.06228320002555847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,float16,fp8,0,0.04391199946403503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,2,128,1,fp8,fp8,0,0.04421440064907074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,float16,fp8,0,0.04366239905357361
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,4,128,1,fp8,fp8,0,0.04368480145931244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,float16,0,0.09209920167922973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,float16,fp8,0,0.043587198853492735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,32,8,128,1,fp8,fp8,0,0.043870401382446286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,float16,0,0.11277279853820801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,float16,0,0.040596801042556765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,float16,fp8,0,0.03467519879341126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,32,128,1,fp8,fp8,0,0.03418880105018616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,float16,0,0.03912320137023926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,float16,fp8,0,0.027075201272964478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,1,128,1,fp8,fp8,0,0.027768000960350037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,float16,fp8,0,0.028417599201202393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,2,128,1,fp8,fp8,0,0.02696479856967926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,float16,0,0.04530400037765503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,float16,fp8,0,0.027636799216270446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,4,128,1,fp8,fp8,0,0.028171199560165405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,float16,0,0.05144960284233093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,float16,fp8,0,0.028696000576019287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,32,8,128,1,fp8,fp8,0,0.028611201047897338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,float16,0,0.053553599119186404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,float16,fp8,0,0.02081120014190674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,32,128,1,fp8,fp8,0,0.020878399908542632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,float16,0,0.028907200694084166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,float16,fp8,0,0.01855839937925339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,1,128,1,fp8,fp8,0,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,float16,0,0.02889440059661865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,float16,fp8,0,0.018267199397087097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,2,128,1,fp8,fp8,0,0.01849440038204193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,float16,0,0.029043200612068176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,float16,fp8,0,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,4,128,1,fp8,fp8,0,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,float16,0,0.03420319855213165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,float16,fp8,0,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,32,8,128,1,fp8,fp8,0,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,float16,0,0.03859840035438537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,float16,fp8,0,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,32,128,1,fp8,fp8,0,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,float16,0,0.02766079902648926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,float16,fp8,0,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,1,128,1,fp8,fp8,0,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,float16,0,0.02677919864654541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,2,128,1,fp8,fp8,0,0.016657599806785585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,float16,0,0.0274399995803833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,4,128,1,fp8,fp8,0,0.016601599752902985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,float16,0,0.028804799914360045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,32,8,128,1,fp8,fp8,0,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,float16,0,0.47611680030822756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,float16,fp8,0,0.4120816230773926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,1,128,1,fp8,fp8,0,0.4134208202362061
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,float16,0,0.5556479930877686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,float16,fp8,0,0.4129968166351318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,2,128,1,fp8,fp8,0,0.41277599334716797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,fp8,0,0.4124879837036133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,float16,float16,0,0.714414405822754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,4,128,1,fp8,fp8,0,0.41271200180053713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,fp8,0,0.41333918571472167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,fp8,fp8,0,0.4124112129211426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,fp8,0,0.3141488075256348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,32,8,128,1,float16,float16,0,1.0326959609985351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,fp8,fp8,0,0.3134943962097168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,float16,0,0.25177760124206544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,float16,fp8,0,0.21158881187438966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,1,128,1,fp8,fp8,0,0.21163039207458495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,float16,0,0.29179840087890624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,float16,fp8,0,0.21142079830169677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,2,128,1,fp8,fp8,0,0.21171200275421143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,32,128,1,float16,float16,0,1.454593563079834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,float16,0,0.3693264007568359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,float16,fp8,0,0.2117408037185669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,4,128,1,fp8,fp8,0,0.2113231897354126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,fp8,0,0.21274878978729247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,fp8,fp8,0,0.21149439811706544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,32,8,128,1,float16,float16,0,0.5264272212982177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,fp8,0,0.16292639970779418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,fp8,fp8,0,0.16202399730682374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,float16,0,0.13926559686660767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,32,128,1,float16,float16,0,0.7379136085510254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,float16,fp8,0,0.11087199449539184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,1,128,1,fp8,fp8,0,0.11166239976882934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,float16,0,0.1584239959716797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,float16,fp8,0,0.11132800579071045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,2,128,1,fp8,fp8,0,0.11116640567779541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,float16,0,0.19703840017318724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,float16,fp8,0,0.11111040115356445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,fp8,0,0.08654080033302307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,4,128,1,fp8,fp8,0,0.11118240356445312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,fp8,0,0.11229759454727173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,float16,float16,0,0.2729327917098999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,32,8,128,1,fp8,fp8,0,0.1111024022102356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,fp8,fp8,0,0.08624799847602845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,32,128,1,float16,float16,0,0.3808144092559814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,float16,0,0.08267359733581543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,float16,fp8,0,0.05964159965515137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,1,128,1,fp8,fp8,0,0.05958560109138489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,float16,0,0.09158400297164918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,float16,fp8,0,0.059595197439193726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,2,128,1,fp8,fp8,0,0.059708797931671144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,float16,0,0.11077439785003662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,float16,fp8,0,0.060057598352432254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,4,128,1,fp8,fp8,0,0.06055840253829956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,float16,0,0.14836959838867186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,float16,fp8,0,0.060785597562789916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,32,8,128,1,fp8,fp8,0,0.061684799194335935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,float16,0,0.20421760082244872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,float16,fp8,0,0.05090559720993042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,32,128,1,fp8,fp8,0,0.0508575975894928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,float16,0,0.047670400142669676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,float16,fp8,0,0.036190399527549745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,1,128,1,fp8,fp8,0,0.03632160127162933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,float16,0,0.05360640287399292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,float16,fp8,0,0.036190399527549745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,2,128,1,fp8,fp8,0,0.03691039979457855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,float16,0,0.06000959873199463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,float16,fp8,0,0.03531199991703034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,4,128,1,fp8,fp8,0,0.035764798521995544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,float16,0,0.0838591992855072
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,float16,fp8,0,0.03702560067176819
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,32,8,128,1,fp8,fp8,0,0.0364767998456955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,float16,0,0.1067088007926941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,float16,fp8,0,0.028865599632263185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,32,128,1,fp8,fp8,0,0.02842560112476349
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,float16,0,0.03317919969558716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,float16,fp8,0,0.02253919988870621
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,1,128,1,fp8,fp8,0,0.02271360009908676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,float16,0,0.03298879861831665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,float16,fp8,0,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,2,128,1,fp8,fp8,0,0.022724799811840057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,float16,0,0.03733760118484497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,float16,fp8,0,0.022672000527381896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,fp8,0,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,4,128,1,fp8,fp8,0,0.022752000391483305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,float16,0,0.024831999838352204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,float16,0,0.04338720142841339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,float16,fp8,0,0.01467359960079193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,float16,fp8,0,0.022064000368118286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,32,8,128,1,fp8,fp8,0,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,float16,float16,0,0.051819199323654176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,32,128,1,fp8,fp8,0,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,1,128,1,fp8,fp8,0,0.014499199390411378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,float16,0,0.02497600018978119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,float16,fp8,0,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,2,128,1,fp8,fp8,0,0.014588800072669984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,float16,0,0.025041601061820982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,float16,0,0.03521760106086731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,float16,fp8,0,0.014582400023937226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,4,128,1,fp8,fp8,0,0.014494399726390838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,float16,0,0.031060799956321716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,fp8,fp8,0,0.012644800543785095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,float16,fp8,0,0.014542399346828461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,fp8,0,0.012694400548934937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,32,8,128,1,fp8,fp8,0,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,float16,fp8,0,0.014577600359916686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,32,128,1,fp8,fp8,0,0.014684799313545226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,float16,0,0.02480800002813339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,1,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,float16,float16,0,0.025016000866889952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,2,128,1,fp8,fp8,0,0.014460800588130951
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,float16,0,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,float16,fp8,0,0.012590399384498597
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,4,128,1,fp8,fp8,0,0.01446399986743927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,float16,0,0.024747200310230255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,float16,fp8,0,0.012608000636100769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,32,8,128,1,fp8,fp8,0,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,float16,0,0.02683520019054413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,32,128,1,fp8,fp8,0,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,float16,0,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,float16,fp8,0,0.012625600397586822
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,1,128,1,fp8,fp8,0,0.012622399628162384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,float16,0,0.022728000581264497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,float16,fp8,0,0.012721599638462066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,2,128,1,fp8,fp8,0,0.012622399628162384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,float16,0,0.022756800055503845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,float16,fp8,0,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,4,128,1,fp8,fp8,0,0.012608000636100769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,float16,0,0.022808000445365906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,32,8,128,1,fp8,fp8,0,0.01265760064125061
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,float16,0,0.29691998958587645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,float16,fp8,0,0.25523359775543214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,1,128,1,fp8,fp8,0,0.25516159534454347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,fp8,0,0.2546080112457275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,float16,0,0.3344752073287964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,float16,fp8,0,0.2554960012435913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,2,128,1,fp8,fp8,0,0.2548176050186157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,float16,float16,0,0.4131487846374512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,4,128,1,fp8,fp8,0,0.2547424077987671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,fp8,0,0.25474560260772705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,fp8,fp8,0,0.2550528049468994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,32,8,128,1,float16,float16,0,0.5706799983978271
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,fp8,0,0.18397120237350464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,fp8,fp8,0,0.18359999656677245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,float16,0,0.16138240098953247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,32,128,1,float16,float16,0,0.7570127964019775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,float16,fp8,0,0.13241599798202514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,1,128,1,fp8,fp8,0,0.13309600353240966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,float16,0,0.18105119466781616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,float16,fp8,0,0.13303359746932983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,2,128,1,fp8,fp8,0,0.13324480056762694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,fp8,0,0.13292479515075684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,float16,0,0.21920158863067626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,float16,fp8,0,0.13196640014648436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,4,128,1,fp8,fp8,0,0.13295199871063232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,float16,float16,0,0.2958336114883423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,32,8,128,1,fp8,fp8,0,0.133025598526001
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,fp8,0,0.09668639898300171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,float16,float16,0,0.3901024103164673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,32,128,1,fp8,fp8,0,0.09660000205039979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,float16,0,0.09546080231666565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,float16,fp8,0,0.07068480253219604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,1,128,1,fp8,fp8,0,0.07070559859275818
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,float16,0,0.1032863974571228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,float16,fp8,0,0.0708847999572754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,float16,0,0.1605728030204773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,2,128,1,fp8,fp8,0,0.0712336003780365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,float16,0,0.12338880300521851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,float16,fp8,0,0.0709712028503418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,4,128,1,fp8,fp8,0,0.07104960083961487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,float16,fp8,0,0.07129600048065185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,32,8,128,1,fp8,fp8,0,0.07175359725952149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,float16,0,0.20522239208221435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,float16,fp8,0,0.053521597385406496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,32,128,1,fp8,fp8,0,0.05347040295600891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,float16,0,0.05161600112915039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,float16,fp8,0,0.039164799451828006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,fp8,0,0.0391728013753891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,1,128,1,fp8,fp8,0,0.039113599061965945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,float16,0,0.05945600271224975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,float16,fp8,0,0.03911199867725372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,2,128,1,fp8,fp8,0,0.03919839859008789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,float16,float16,0,0.06599360108375549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,4,128,1,fp8,fp8,0,0.03911519944667816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,float16,0,0.08985440135002136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,float16,fp8,0,0.03918879926204681
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,32,8,128,1,fp8,fp8,0,0.03918719887733459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,float16,0,0.10998400449752807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,float16,fp8,0,0.03089759945869446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,32,128,1,fp8,fp8,0,0.030859199166297913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,float16,0,0.03672640025615692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,float16,fp8,0,0.024798400700092316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,1,128,1,fp8,fp8,0,0.024851199984550477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,float16,0,0.03699679970741272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,fp8,0,0.02475520074367523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,float16,fp8,0,0.024780799448490144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,float16,0,0.053039997816085815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,2,128,1,fp8,fp8,0,0.024849599599838255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,float16,0,0.0411871999502182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,float16,fp8,0,0.024883200228214265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,4,128,1,fp8,fp8,0,0.024769599735736846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,float16,float16,0,0.04731520116329193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,32,8,128,1,fp8,fp8,0,0.02489120066165924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,float16,fp8,0,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,32,128,1,fp8,fp8,0,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,float16,0,0.02683840095996857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,float16,fp8,0,0.0165120005607605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,1,128,1,fp8,fp8,0,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,float16,0,0.026815998554229736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,float16,fp8,0,0.016463999450206757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,2,128,1,fp8,fp8,0,0.016208000481128693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,float16,0,0.02746239900588989
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,float16,fp8,0,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,4,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,float16,0,0.03296639919281006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,32,8,128,1,float16,fp8,0,0.0166703999042511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,float16,0,0.033913600444793704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,float16,fp8,0,0.012470400333404541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,32,128,1,fp8,fp8,0,0.012638400495052337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,float16,0,0.022679999470710754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,float16,fp8,0,0.010471999645233154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,1,128,1,fp8,fp8,0,0.010710400342941285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,float16,0,0.02268960028886795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,float16,fp8,0,0.01141119971871376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,2,128,1,fp8,fp8,0,0.011587200313806533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,float16,0,0.022697600722312927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,float16,fp8,0,0.012212800234556198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,4,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,float16,0,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,32,8,128,1,fp8,fp8,0,0.01064319983124733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,float16,0,0.025355198979377748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,float16,fp8,0,0.010710400342941285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,32,128,1,fp8,fp8,0,0.010627199709415436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,float16,0,0.020857599377632142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,1,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,float16,0,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,2,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,float16,0,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,float16,fp8,0,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,4,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,float16,0,0.020750400424003602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,float16,fp8,0,0.010558400303125381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,32,8,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,float16,0,0.019288000464439393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,32,128,1,fp8,fp8,0,0.010627199709415436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,float16,0,0.020766399800777435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,1,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,float16,0,0.018750399351119995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,2,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,float16,0,0.018644799292087556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,4,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,float16,0,0.018900799751281738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,32,8,128,1,fp8,fp8,0,0.010604800283908844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,float16,0,0.24511680603027344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,float16,fp8,0,0.2054896116256714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,1,128,1,fp8,fp8,0,0.20554239749908448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,float16,0,0.2659392118453979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,float16,fp8,0,0.20582880973815917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,2,128,1,fp8,fp8,0,0.20596160888671874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,float16,0,0.3034447908401489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,float16,fp8,0,0.2054431915283203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,4,128,1,fp8,fp8,0,0.20541439056396485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,fp8,0,0.13376319408416748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,float16,0,0.38000640869140623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,float16,fp8,0,0.20534560680389405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,32,8,128,1,fp8,fp8,0,0.20573599338531495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,float16,float16,0,0.43291358947753905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,32,128,1,fp8,fp8,0,0.1335360050201416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,fp8,0,0.10790560245513917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,float16,float16,0,0.13825600147247313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,1,128,1,fp8,fp8,0,0.10795680284500123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,float16,0,0.14478399753570556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,float16,fp8,0,0.10787039995193481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,2,128,1,fp8,fp8,0,0.10790239572525025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,float16,0,0.16465280055999756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,float16,fp8,0,0.10750559568405152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,4,128,1,fp8,fp8,0,0.1077232003211975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,float16,0,0.20398879051208496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,float16,fp8,0,0.10838719606399536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,32,8,128,1,fp8,fp8,0,0.1080288052558899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,float16,0,0.22673280239105226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,float16,fp8,0,0.07232159972190857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,32,128,1,fp8,fp8,0,0.07159839868545533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,float16,0,0.07567039728164673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,float16,fp8,0,0.05767359733581543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,1,128,1,fp8,fp8,0,0.058222401142120364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,float16,0,0.0798143982887268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,float16,0,0.11172480583190918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,float16,fp8,0,0.058089601993560794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,2,128,1,fp8,fp8,0,0.05798879861831665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,float16,0,0.09167680144309998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,float16,fp8,0,0.057955199480056764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,4,128,1,fp8,fp8,0,0.05773119926452637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,float16,fp8,0,0.05812320113182068
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,32,8,128,1,fp8,fp8,0,0.0577023983001709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,float16,0,0.12164479494094849
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,float16,fp8,0,0.03920319974422455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,32,128,1,fp8,fp8,0,0.03918560147285462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,float16,0,0.04662240147590637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,float16,fp8,0,0.033036801218986514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,1,128,1,fp8,fp8,0,0.033046400547027587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,float16,0,0.047356799244880676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,float16,fp8,0,0.03298560082912445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,2,128,1,fp8,fp8,0,0.03294560015201568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,float16,0,0.051692801713943484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,float16,fp8,0,0.03306559920310974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,fp8,fp8,0,0.024692800641059876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,4,128,1,fp8,fp8,0,0.032995200157165526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,float16,0,0.05778080224990845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,float16,fp8,0,0.03299039900302887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,32,8,128,1,fp8,fp8,0,0.03327359855175018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,float16,0,0.058064001798629764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,32,128,1,float16,fp8,0,0.024753600358963013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,float16,0,0.033024001121521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,float16,fp8,0,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,1,128,1,fp8,fp8,0,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,float16,0,0.03332160115242004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,float16,fp8,0,0.0208064004778862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,2,128,1,fp8,fp8,0,0.021147200465202333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,float16,0,0.034892800450325015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,float16,fp8,0,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,4,128,1,fp8,fp8,0,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,float16,0,0.039099198579788205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,float16,fp8,0,0.02083040028810501
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,32,8,128,1,fp8,fp8,0,0.02094080001115799
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,float16,0,0.037176001071929934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,float16,fp8,0,0.016579200327396394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,32,128,1,fp8,fp8,0,0.016582399606704712
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,float16,0,0.026368001103401185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,float16,fp8,0,0.014524799585342408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,1,128,1,fp8,fp8,0,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,float16,float16,0,0.026958400011062623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,2,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,float16,0,0.025911998748779298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,float16,fp8,0,0.014590400457382201
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,4,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,float16,0,0.026833599805831908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,float16,fp8,0,0.014502400159835815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,32,8,128,1,fp8,fp8,0,0.014616000652313232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,float16,0,0.026902401447296144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,float16,fp8,0,0.011723200231790543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,32,128,1,fp8,fp8,0,0.01053439974784851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,float16,0,0.020815999805927278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,1,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,float16,0,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,float16,fp8,0,0.010580799728631973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,2,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,float16,0,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,4,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,float16,0,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,float16,0,0.018564799427986146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,32,8,128,1,fp8,fp8,0,0.010639999806880952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,float16,float16,0,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,32,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,1,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,float16,0,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,float16,fp8,0,0.010543999820947647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,2,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,float16,0,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,4,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,float16,0,0.01870879977941513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,float16,fp8,0,0.010737600177526474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,32,8,128,1,fp8,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,float16,0,0.018771199882030486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,float16,fp8,0,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,fp8,fp8,0,0.010265599936246872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,32,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,float16,0,0.018811200559139252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,1,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,float16,0,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,2,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,4,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,fp8,0,0.18380160331726075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,32,8,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,float16,0,0.2285248041152954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,float16,fp8,0,0.18373119831085205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,1,128,1,fp8,fp8,0,0.18381439447402953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,float16,float16,0,0.23181600570678712
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,2,128,1,fp8,fp8,0,0.1836192011833191
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,fp8,0,0.18380800485610962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,float16,float16,0,0.2582047939300537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,4,128,1,fp8,fp8,0,0.18397599458694458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,float16,0,0.29433279037475585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,float16,fp8,0,0.18360320329666138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,32,8,128,1,fp8,fp8,0,0.18381279706954956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,float16,0,0.2715264081954956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,float16,fp8,0,0.10945600271224976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,32,128,1,fp8,fp8,0,0.109169602394104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,float16,0,0.1211840033531189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,float16,fp8,0,0.09536319971084595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,1,128,1,fp8,fp8,0,0.09543840289115905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,float16,0,0.1252400040626526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,float16,fp8,0,0.09530879855155945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,2,128,1,fp8,fp8,0,0.09485440254211426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,float16,0,0.13535360097885132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,float16,fp8,0,0.09529280066490173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,4,128,1,fp8,fp8,0,0.09588479995727539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,float16,0,0.15875680446624757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,float16,fp8,0,0.09527199864387512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,32,8,128,1,fp8,fp8,0,0.09597920179367066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,float16,0,0.14570720195770265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,float16,fp8,0,0.0583728015422821
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,32,128,1,fp8,fp8,0,0.05810400247573853
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,float16,0,0.06965759992599488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,float16,fp8,0,0.05273920297622681
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,1,128,1,fp8,fp8,0,0.052534401416778564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,float16,0,0.07109760046005249
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,float16,fp8,0,0.051795202493667605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,2,128,1,fp8,fp8,0,0.05212000012397766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,float16,0,0.07423359751701356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,float16,fp8,0,0.051876801252365115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,4,128,1,fp8,fp8,0,0.05183359980583191
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,float16,0,0.08167200088500977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,float16,fp8,0,0.05293120145797729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,32,8,128,1,fp8,fp8,0,0.052804797887802124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,float16,0,0.06991519927978515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,float16,fp8,0,0.0331167995929718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,32,128,1,fp8,fp8,0,0.033022400736808774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,float16,0,0.0451119989156723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,float16,fp8,0,0.030880001187324525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,1,128,1,fp8,fp8,0,0.030950400233268737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,float16,0,0.045023998618125914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,float16,fp8,0,0.030995199084281923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,2,128,1,fp8,fp8,0,0.030875200033187868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,float16,0,0.04528799951076508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,float16,fp8,0,0.030697599053382874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,4,128,1,fp8,fp8,0,0.030950400233268737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,float16,0,0.050147199630737306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,float16,fp8,0,0.031068798899650574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,32,8,128,1,fp8,fp8,0,0.03089439868927002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,float16,0,0.043750399351119997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,float16,fp8,0,0.021583999693393707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,32,128,1,fp8,fp8,0,0.021622399985790252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,float16,0,0.03194079995155334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,float16,fp8,0,0.020505599677562714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,1,128,1,fp8,fp8,0,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,float16,0,0.0317903995513916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,float16,fp8,0,0.019913600385189058
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,2,128,1,fp8,fp8,0,0.020759999752044678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,float16,0,0.03301919996738434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,float16,fp8,0,0.019806399941444397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,4,128,1,fp8,fp8,0,0.020598399639129638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,float16,0,0.033118399977684024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,float16,fp8,0,0.020262399315834047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,32,8,128,1,fp8,fp8,0,0.020329600572586058
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,float16,0,0.030374398827552794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,float16,fp8,0,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,32,128,1,fp8,fp8,0,0.014473600685596466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,float16,0,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,1,128,1,fp8,fp8,0,0.012702399492263794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,float16,0,0.024784000217914583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,float16,fp8,0,0.01390880048274994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,2,128,1,fp8,fp8,0,0.014135999977588654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,float16,0,0.02475520074367523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,float16,fp8,0,0.012726399302482604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,4,128,1,fp8,fp8,0,0.012694400548934937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,float16,0,0.02476319968700409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,float16,fp8,0,0.014441600441932679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,32,8,128,1,fp8,fp8,0,0.013648000359535218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,float16,0,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,32,128,1,fp8,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,float16,0,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,1,128,1,fp8,fp8,0,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,2,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,float16,0,0.019812799990177155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,4,128,1,fp8,fp8,0,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,float16,float16,0,0.020632000267505647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,32,8,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,float16,0,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,32,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,float16,0,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,1,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,float16,0,0.018588800728321076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,2,128,1,fp8,fp8,0,0.009142400324344635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,4,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,32,8,128,1,fp8,fp8,0,0.00958240032196045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,fp8,0,0.00875839963555336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,float16,0,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,float16,fp8,0,0.008664000034332275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,32,128,1,fp8,fp8,0,0.009081599861383438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,1,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,2,128,1,fp8,fp8,0,0.008555199950933456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,float16,0,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,4,128,1,fp8,fp8,0,0.00852160006761551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,32,8,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,float16,0,0.21540639400482178
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,float16,fp8,0,0.17135039567947388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,fp8,0,0.17114239931106567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,fp8,fp8,0,0.17142399549484252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,1,128,1,fp8,fp8,0,0.17299360036849976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,float16,0,0.2236191987991333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,float16,fp8,0,0.17163360118865967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,2,128,1,fp8,fp8,0,0.17218879461288453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,4,128,1,float16,float16,0,0.23119680881500243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,float16,0,0.2540208101272583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,float16,fp8,0,0.17144639492034913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,fp8,0,0.09054399728775024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,32,8,128,1,fp8,fp8,0,0.1720255970954895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,float16,0,0.1955183982849121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,fp8,0,0.09079520106315613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,float16,fp8,0,0.09727519750595093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,float16,0,0.1245535969734192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,32,128,1,fp8,fp8,0,0.09730560183525086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,float16,float16,0,0.11849759817123413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,1,128,1,fp8,fp8,0,0.09130719900131226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,float16,float16,0,0.11776479482650756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,2,128,1,fp8,fp8,0,0.09109600186347962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,float16,fp8,0,0.0906831979751587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,4,128,1,fp8,fp8,0,0.0910256028175354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,float16,0,0.130457603931427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,float16,fp8,0,0.09117280244827271
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,32,8,128,1,fp8,fp8,0,0.09129440188407897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,float16,0,0.09814559817314147
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,float16,fp8,0,0.05353279709815979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,32,128,1,fp8,fp8,0,0.05348640084266663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,float16,0,0.06958879828453064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,float16,fp8,0,0.05124319791793823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,1,128,1,fp8,fp8,0,0.05097439885139465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,float16,0,0.07053120136260986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,float16,fp8,0,0.050646400451660155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,2,128,1,fp8,fp8,0,0.05092800259590149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,float16,0,0.06970080137252807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,float16,fp8,0,0.05040799975395203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,4,128,1,fp8,fp8,0,0.05116320252418518
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,float16,0,0.07530080080032349
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,float16,fp8,0,0.05103840231895447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,32,8,128,1,fp8,fp8,0,0.05045440196990967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,float16,0,0.05572800040245056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,float16,fp8,0,0.031169599294662474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,32,128,1,fp8,fp8,0,0.030972799658775328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,float16,0,0.04423039853572845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,float16,fp8,0,0.029558399319648744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,1,128,1,fp8,fp8,0,0.028942400217056276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,float16,0,0.0454255998134613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,float16,fp8,0,0.029257598519325256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,2,128,1,fp8,fp8,0,0.028921601176261903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,float16,0,0.04528320133686066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,float16,fp8,0,0.02922239899635315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,4,128,1,fp8,fp8,0,0.030480000376701354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,float16,0,0.045311999320983884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,float16,fp8,0,0.029100799560546876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,32,8,128,1,fp8,fp8,0,0.03033120036125183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,float16,0,0.036601600050926206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,float16,fp8,0,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,32,128,1,fp8,fp8,0,0.020667199790477753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,fp8,0,0.019627200067043306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,float16,float16,0,0.03091199994087219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,1,128,1,fp8,fp8,0,0.019519999623298645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,float16,0,0.030987200140953065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,float16,fp8,0,0.01964000016450882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,2,128,1,fp8,fp8,0,0.019280000030994414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,float16,0,0.03089280128479004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,float16,fp8,0,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,4,128,1,fp8,fp8,0,0.019417600333690645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,float16,0,0.030955201387405394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,float16,fp8,0,0.019409599900245666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,32,8,128,1,fp8,fp8,0,0.019041599333286287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,float16,float16,0,0.0239424005150795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,32,128,1,fp8,fp8,0,0.013396799564361572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,float16,0,0.02271360009908676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,float16,fp8,0,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,1,128,1,fp8,fp8,0,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,float16,0,0.02276639938354492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,float16,fp8,0,0.013143999874591828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,2,128,1,fp8,fp8,0,0.013076800107955932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,float16,0,0.022779199481010436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,float16,fp8,0,0.012608000636100769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,4,128,1,fp8,fp8,0,0.012915199995040894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,float16,0,0.022776000201702118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,float16,fp8,0,0.012452799826860428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,32,8,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,float16,0,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,32,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,float16,0,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,float16,fp8,0,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,1,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,float16,0,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,float16,fp8,0,0.010369600355625152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,2,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,float16,0,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,4,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,fp8,0,0.010356800258159637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,float16,float16,0,0.020664000511169435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,32,8,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,32,128,1,fp8,fp8,0,0.009993600100278855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,float16,float16,0,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,1,128,1,fp8,fp8,0,0.010390400141477584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,fp8,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,2,128,1,float16,fp8,0,0.010355199873447418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,fp8,fp8,0,0.00878399983048439
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,float16,float16,0,0.018566399812698364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,4,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,float16,0,0.01870879977941513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,32,8,128,1,float16,fp8,0,0.009424000233411788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,float16,0,0.01857440024614334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,32,128,1,float16,fp8,0,0.0095040000975132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,float16,0,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,float16,fp8,0,0.008584000170230865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,1,128,1,fp8,fp8,0,0.009391999989748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,float16,0,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,float16,fp8,0,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,2,128,1,fp8,fp8,0,0.008560000360012055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,fp8,fp8,0,0.009200000017881394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,float16,fp8,0,0.008684799820184708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,4,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,32,8,128,1,float16,float16,0,0.01855199933052063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,fp8,0,8.679096221923828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,fp8,fp8,0,8.64188003540039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,fp8,0,8.63686752319336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,fp8,fp8,0,8.667237091064454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,1,128,1,float16,float16,0,10.948092651367187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,2,128,1,float16,float16,0,11.086262512207032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,float16,0,11.407628631591797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,float16,fp8,0,8.699578857421875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,4,128,1,fp8,fp8,0,8.80859375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,fp8,0,8.792829132080078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,fp8,fp8,0,8.87242431640625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,float16,0,7.3051597595214846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,float16,fp8,0,4.538927841186523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,float16,0,5.532753753662109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,24,128,1,fp8,fp8,0,4.58105583190918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,24,8,128,1,float16,float16,0,12.212742614746094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,float16,fp8,0,4.445267105102539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,1,128,1,fp8,fp8,0,4.4555103302001955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,fp8,fp8,0,4.324934387207032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,fp8,0,4.536780929565429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,2,128,1,float16,float16,0,5.815630340576172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,fp8,0,4.3318321228027346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,float16,float16,0,5.872843170166016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,4,128,1,fp8,fp8,0,4.752764892578125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,fp8,fp8,0,2.2820144653320313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,float16,0,3.553083038330078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,24,128,1,float16,fp8,0,2.61297607421875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,fp8,0,4.449744033813476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,fp8,fp8,0,4.353401565551758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,24,8,128,1,float16,float16,0,5.933422470092774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,float16,0,2.9856847763061523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,float16,fp8,0,2.3015792846679686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,1,128,1,fp8,fp8,0,2.2096799850463866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,fp8,0,2.2750431060791017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,float16,float16,0,2.7269039154052734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,2,128,1,fp8,fp8,0,2.3118480682373046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,float16,0,2.8189071655273437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,float16,fp8,0,2.472060775756836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,4,128,1,fp8,fp8,0,2.5655887603759764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,fp8,0,1.2161359786987305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,fp8,0,2.2113264083862303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,float16,float16,0,2.950187110900879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,float16,float16,0,2.0887872695922853
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,24,8,128,1,fp8,fp8,0,2.221196746826172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,24,128,1,fp8,fp8,0,1.5476351737976075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,float16,0,1.3496047973632812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,float16,fp8,0,1.28897123336792
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,1,128,1,fp8,fp8,0,1.1601072311401368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,float16,0,1.3729311943054199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,float16,fp8,0,1.4317616462707519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,2,128,1,fp8,fp8,0,1.3442399978637696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,float16,0,1.3999247550964355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,float16,fp8,0,1.3102047920227051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,4,128,1,fp8,fp8,0,1.271123218536377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,fp8,0,1.1554431915283203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,float16,float16,0,1.4729727745056151
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,24,8,128,1,fp8,fp8,0,1.3594271659851074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,fp8,0,4.974508666992188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,fp8,0,5.067270278930664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,fp8,fp8,0,5.184523010253907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,fp8,fp8,0,5.072588729858398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,1,128,1,float16,float16,0,6.3143455505371096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,fp8,0,5.082815933227539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,2,128,1,float16,float16,0,6.377841567993164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,float16,float16,0,6.671878051757813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,fp8,0,2.6504543304443358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,fp8,fp8,0,3.093100738525391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,4,128,1,fp8,fp8,0,5.0960430145263675
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,float16,0,3.2704494476318358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,24,128,1,float16,float16,0,4.457054519653321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,fp8,0,5.01519660949707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,fp8,fp8,0,5.0938465118408205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,float16,fp8,0,2.519766426086426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,1,128,1,fp8,fp8,0,2.539396858215332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,24,8,128,1,float16,float16,0,7.066572570800782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,fp8,fp8,0,2.608700752258301
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,float16,0,3.176665687561035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,fp8,0,2.5681119918823243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,2,128,1,float16,fp8,0,3.121246337890625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,float16,float16,0,3.2063087463378905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,4,128,1,fp8,fp8,0,2.54257755279541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,fp8,0,1.383801555633545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,fp8,0,2.870731163024902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,fp8,fp8,0,1.3528688430786133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,float16,float16,0,3.5132366180419923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,float16,0,1.7552879333496094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,float16,fp8,0,1.3107616424560546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,24,8,128,1,fp8,fp8,0,2.5485872268676757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,24,128,1,float16,float16,0,2.4974319458007814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,1,128,1,fp8,fp8,0,1.29749755859375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,fp8,0,1.3038000106811523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,float16,float16,0,1.5471023559570312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,fp8,0,1.297590446472168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,2,128,1,fp8,fp8,0,1.6041391372680665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,fp8,fp8,0,1.3251631736755372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,4,128,1,float16,float16,0,1.6968704223632813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,float16,0,1.7247968673706056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,fp8,0,0.8150704383850098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,float16,fp8,0,1.2966511726379395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,fp8,fp8,0,0.728878402709961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,24,128,1,float16,float16,0,1.1368448257446289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,float16,0,0.8477071762084961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,24,8,128,1,fp8,fp8,0,1.402905559539795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,float16,fp8,0,0.7422128200531006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,1,128,1,fp8,fp8,0,0.7344223976135253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,float16,0,0.8084464073181152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,float16,fp8,0,0.6995744228363037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,2,128,1,fp8,fp8,0,0.6974048137664794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,fp8,0,0.7098336219787598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,float16,float16,0,0.8896736145019531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,4,128,1,fp8,fp8,0,0.7562399864196777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,float16,0,0.9032832145690918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,float16,fp8,0,0.7677152156829834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,24,8,128,1,fp8,fp8,0,0.6971280097961425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,fp8,0,3.543716812133789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,fp8,fp8,0,3.6282798767089846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,fp8,0,3.567428970336914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,fp8,fp8,0,3.5880496978759764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,1,128,1,float16,float16,0,4.523648071289062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,2,128,1,float16,float16,0,4.433287811279297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,float16,0,4.674673461914063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,float16,fp8,0,3.5197265625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,fp8,0,2.3815488815307617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,4,128,1,fp8,fp8,0,3.5804080963134766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,fp8,fp8,0,2.1183984756469725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,fp8,0,3.5567726135253905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,float16,0,2.073294448852539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,fp8,fp8,0,3.5673919677734376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,24,128,1,float16,float16,0,3.514271926879883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,24,8,128,1,float16,float16,0,5.050640106201172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,float16,fp8,0,1.8443248748779297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,1,128,1,fp8,fp8,0,1.8662847518920898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,fp8,0,1.8408960342407226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,float16,float16,0,2.1252288818359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,2,128,1,fp8,fp8,0,2.116537666320801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,fp8,0,1.8641263961791992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,float16,float16,0,2.2887359619140626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,4,128,1,fp8,fp8,0,2.087233543395996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,fp8,0,1.795929527282715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,fp8,0,1.0145296096801757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,fp8,fp8,0,0.9956496238708497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,fp8,fp8,0,1.8015792846679688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,24,8,128,1,float16,float16,0,2.578023910522461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,float16,0,1.1902000427246093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,24,128,1,float16,float16,0,2.000351905822754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,fp8,fp8,0,0.9552399635314941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,1,128,1,float16,fp8,0,1.0885583877563476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,fp8,0,0.9375167846679687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,float16,float16,0,1.1048992156982422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,2,128,1,fp8,fp8,0,0.9504575729370117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,fp8,0,1.0885760307312011
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,float16,float16,0,1.2657391548156738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,4,128,1,fp8,fp8,0,1.1918448448181151
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,fp8,0,0.5366608142852783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,fp8,0,0.9372847557067872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,fp8,fp8,0,0.9358575820922852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,24,8,128,1,float16,float16,0,1.2564528465270997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,float16,float16,0,0.87991361618042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,24,128,1,fp8,fp8,0,0.5853392124176026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,fp8,0,0.5189280033111572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,float16,float16,0,0.6337215900421143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,1,128,1,fp8,fp8,0,0.5095424175262451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,fp8,fp8,0,0.5924560070037842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,fp8,0,0.5109856128692627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,2,128,1,float16,float16,0,0.5864736080169678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,float16,0,0.7018352031707764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,float16,fp8,0,0.5097648143768311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,float16,0,0.6199600219726562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,float16,fp8,0,0.5217872142791748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,4,128,1,fp8,fp8,0,0.5198527812957764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,24,8,128,1,fp8,fp8,0,0.5476687908172607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,fp8,0,4.654375839233398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,fp8,fp8,0,4.653542327880859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,fp8,0,4.631422424316407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,fp8,fp8,0,4.660924911499023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,1,128,1,float16,float16,0,5.770273590087891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,fp8,0,4.693783950805664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,2,128,1,float16,float16,0,5.997150421142578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,float16,float16,0,6.1025230407714846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,fp8,0,2.547491264343262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,fp8,fp8,0,2.8004016876220703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,4,128,1,fp8,fp8,0,4.710531234741211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,float16,0,2.9964672088623048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,fp8,0,4.686072158813476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,fp8,fp8,0,4.6851856231689455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,24,128,1,float16,float16,0,4.946247863769531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,float16,fp8,0,2.3349103927612305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,1,128,1,fp8,fp8,0,2.39890079498291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,24,8,128,1,float16,float16,0,6.919795227050781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,float16,0,2.8882831573486327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,fp8,fp8,0,2.3601919174194337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,2,128,1,float16,fp8,0,2.7401023864746095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,fp8,0,2.3351104736328123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,float16,float16,0,3.0222272872924805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,4,128,1,fp8,fp8,0,2.3519039154052734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,fp8,0,1.2713423728942872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,fp8,0,2.632908821105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,float16,0,1.3873536109924316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,float16,float16,0,3.321985626220703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,fp8,fp8,0,1.5656144142150878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,24,128,1,float16,float16,0,2.293844795227051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,24,8,128,1,fp8,fp8,0,2.334628868103027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,float16,fp8,0,1.1952336311340332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,1,128,1,fp8,fp8,0,1.2840559959411622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,float16,0,1.4450112342834474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,float16,fp8,0,1.4286815643310546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,2,128,1,fp8,fp8,0,1.2840543746948243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,fp8,0,1.219153594970703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,float16,float16,0,1.5031807899475098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,4,128,1,fp8,fp8,0,1.1952351570129394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,float16,0,1.6660512924194335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,fp8,0,0.6717472076416016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,fp8,fp8,0,0.7885536193847656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,fp8,fp8,0,1.302187156677246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,24,8,128,1,float16,fp8,0,1.5123488426208496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,float16,0,0.7201488018035889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,fp8,fp8,0,0.6984208106994629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,24,128,1,float16,float16,0,1.182859230041504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,1,128,1,float16,fp8,0,0.6322288036346435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,float16,0,0.7411168098449707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,float16,fp8,0,0.7465040206909179
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,2,128,1,fp8,fp8,0,0.6635168075561524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,fp8,0,0.6331664085388183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,float16,float16,0,0.8200799942016601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,float16,0,0.8571056365966797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,4,128,1,fp8,fp8,0,0.6991136074066162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,float16,fp8,0,0.6338335990905761
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,24,8,128,1,fp8,fp8,0,0.633516788482666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,fp8,0,0.37077438831329346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,float16,float16,0,0.6398176193237305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,24,128,1,fp8,fp8,0,0.37090721130371096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,float16,0,0.40623040199279786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,float16,fp8,0,0.35086081027984617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,1,128,1,fp8,fp8,0,0.353657603263855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,float16,0,0.40272960662841795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,float16,fp8,0,0.35319199562072756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,2,128,1,fp8,fp8,0,0.35040318965911865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,float16,0,0.42529921531677245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,float16,fp8,0,0.35123519897460936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,4,128,1,fp8,fp8,0,0.35312159061431886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,float16,0,0.46408638954162595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,float16,fp8,0,0.3514640092849731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,24,8,128,1,fp8,fp8,0,0.3523967981338501
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,fp8,0,2.7121103286743162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,fp8,fp8,0,2.7126592636108398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,1,128,1,float16,float16,0,3.204198455810547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,fp8,0,2.7151824951171877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,fp8,fp8,0,2.7248464584350587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,2,128,1,float16,float16,0,3.288355255126953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,fp8,0,2.728830337524414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,float16,float16,0,3.6807487487792967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,fp8,fp8,0,1.5165264129638671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,4,128,1,fp8,fp8,0,2.7678688049316404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,fp8,0,1.7994560241699218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,fp8,fp8,0,2.776371192932129
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,float16,0,1.6208751678466797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,24,128,1,float16,float16,0,2.9634048461914064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,fp8,0,3.3597648620605467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,24,8,128,1,float16,float16,0,4.077998352050781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,float16,fp8,0,1.3870384216308593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,1,128,1,fp8,fp8,0,1.393777561187744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,float16,0,1.655556869506836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,float16,fp8,0,1.429753589630127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,2,128,1,fp8,fp8,0,1.795684814453125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,fp8,0,1.454587173461914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,float16,float16,0,1.779324722290039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,4,128,1,fp8,fp8,0,1.5502592086791993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,fp8,0,1.3854880332946777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,fp8,0,0.873532772064209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,fp8,fp8,0,0.7870816230773926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,24,128,1,float16,float16,0,1.5094351768493652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,float16,float16,0,2.007151985168457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,float16,0,0.8546832084655762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,24,8,128,1,fp8,fp8,0,1.6907567977905273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,float16,fp8,0,0.8059103965759278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,1,128,1,fp8,fp8,0,0.7200208187103272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,fp8,0,0.7231296062469482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,float16,float16,0,0.8588687896728515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,2,128,1,fp8,fp8,0,0.7207632064819336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,float16,0,0.926478385925293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,float16,fp8,0,0.7857632160186767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,4,128,1,fp8,fp8,0,0.7562064170837403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,fp8,0,0.7216063976287842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,float16,float16,0,1.031438446044922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,24,8,128,1,fp8,fp8,0,0.7230447769165039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,fp8,0,0.4158656120300293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,fp8,fp8,0,0.4326464176177979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,24,128,1,float16,float16,0,0.8109840393066406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,float16,0,0.45241918563842776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,float16,0,0.4481808185577393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,float16,fp8,0,0.3886480093002319
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,1,128,1,fp8,fp8,0,0.3880079984664917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,float16,fp8,0,0.3894272089004517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,2,128,1,fp8,fp8,0,0.38801600933074953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,float16,0,0.48226242065429686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,float16,fp8,0,0.3974143981933594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,4,128,1,fp8,fp8,0,0.3887167930603027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,float16,0,0.5417200088500976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,float16,fp8,0,0.3884880065917969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,24,8,128,1,fp8,fp8,0,0.39014239311218263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,float16,0,0.421124792098999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,float16,fp8,0,0.23790559768676758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,24,128,1,fp8,fp8,0,0.2388144016265869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,float16,0,0.24320800304412843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,float16,fp8,0,0.2221343994140625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,1,128,1,fp8,fp8,0,0.22207839488983155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,float16,0,0.26627840995788576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,float16,0,0.2532383918762207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,float16,fp8,0,0.22153759002685547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,2,128,1,fp8,fp8,0,0.22187840938568115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,float16,fp8,0,0.2221776008605957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,4,128,1,fp8,fp8,0,0.22254400253295897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,float16,0,0.30101919174194336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,float16,fp8,0,0.2232896089553833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,24,8,128,1,fp8,fp8,0,0.22361600399017334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,fp8,0,2.633742332458496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,float16,float16,0,3.0615135192871095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,1,128,1,fp8,fp8,0,2.6284032821655274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,fp8,0,2.6265871047973635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,fp8,fp8,0,2.627903938293457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,2,128,1,float16,float16,0,3.2649520874023437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,fp8,0,2.632313537597656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,float16,float16,0,3.5787727355957033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,4,128,1,fp8,fp8,0,2.6393680572509766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,fp8,0,1.7630960464477539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,fp8,0,2.628945541381836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,fp8,fp8,0,2.632206344604492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,fp8,fp8,0,1.4790351867675782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,float16,0,1.6563039779663087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,24,128,1,float16,float16,0,3.340081787109375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,float16,fp8,0,1.3442319869995116
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,1,128,1,fp8,fp8,0,1.3442399978637696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,24,8,128,1,float16,float16,0,4.423353576660157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,fp8,0,1.34225435256958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,float16,float16,0,1.6115888595581054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,2,128,1,fp8,fp8,0,1.3368895530700684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,fp8,0,1.3345536231994628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,float16,float16,0,1.893052864074707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,4,128,1,fp8,fp8,0,1.336684799194336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,fp8,0,0.7602015972137451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,fp8,0,1.3895968437194823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,fp8,fp8,0,1.3351648330688477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,fp8,fp8,0,0.7586639881134033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,24,8,128,1,float16,float16,0,2.08544807434082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,float16,0,0.7928160190582275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,float16,fp8,0,0.688801622390747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,24,128,1,float16,float16,0,1.7677696228027344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,1,128,1,fp8,fp8,0,0.6891183853149414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,fp8,0,0.6912528038024902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,float16,float16,0,0.8266400337219239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,2,128,1,fp8,fp8,0,0.6877327919006347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,fp8,0,0.7403711795806884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,float16,float16,0,0.9068096160888672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,4,128,1,fp8,fp8,0,0.689024019241333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,fp8,fp8,0,0.6885359764099122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,fp8,0,0.4005263805389404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,fp8,0,0.6898863792419434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,24,8,128,1,float16,float16,0,1.0598272323608398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,fp8,fp8,0,0.4126880168914795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,24,128,1,float16,float16,0,0.8696720123291015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,float16,0,0.41529598236083987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,float16,fp8,0,0.36533119678497317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,1,128,1,fp8,fp8,0,0.3650752067565918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,float16,0,0.4283455848693848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,float16,fp8,0,0.3654864072799683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,2,128,1,fp8,fp8,0,0.36689119338989257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,float16,0,0.47179360389709474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,float16,fp8,0,0.3652447938919067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,4,128,1,fp8,fp8,0,0.3699647903442383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,fp8,0,0.3650752067565918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,float16,float16,0,0.5479536056518555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,24,8,128,1,fp8,fp8,0,0.3651952028274536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,fp8,0,0.22175838947296142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,float16,float16,0,0.45789279937744143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,24,128,1,fp8,fp8,0,0.22363040447235108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,float16,0,0.2292720079421997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,float16,fp8,0,0.20277600288391112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,1,128,1,fp8,fp8,0,0.20169599056243898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,fp8,fp8,0,0.2027888059616089
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,float16,0,0.23803360462188722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,float16,fp8,0,0.20297279357910156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,2,128,1,fp8,fp8,0,0.20214240550994872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,float16,0,0.25923359394073486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,4,128,1,float16,fp8,0,0.20442719459533693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,float16,0,0.298308801651001
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,float16,fp8,0,0.20295519828796388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,24,8,128,1,fp8,fp8,0,0.20383200645446778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,float16,0,0.2540575981140137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,float16,0,0.13990399837493897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,float16,fp8,0,0.13042399883270264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,24,128,1,fp8,fp8,0,0.12985440492630004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,float16,0,0.13548959493637086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,float16,fp8,0,0.12216000556945801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,1,128,1,fp8,fp8,0,0.12144320011138916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,float16,fp8,0,0.12119840383529663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,2,128,1,fp8,fp8,0,0.12175359725952148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,float16,0,0.14642239809036256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,float16,fp8,0,0.12132799625396729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,4,128,1,fp8,fp8,0,0.12177120447158814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,float16,0,0.1649791955947876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,float16,fp8,0,0.1214400053024292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,24,8,128,1,fp8,fp8,0,0.12165759801864624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,fp8,0,1.6131359100341798
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,float16,float16,0,1.8180448532104492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,1,128,1,fp8,fp8,0,1.6112432479858398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,fp8,0,1.611017608642578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,float16,float16,0,1.9736207962036132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,2,128,1,fp8,fp8,0,1.6108495712280273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,float16,0,2.203545570373535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,float16,fp8,0,1.6112079620361328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,4,128,1,fp8,fp8,0,1.855691146850586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,fp8,0,1.6124271392822265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,fp8,0,0.9762432098388671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,fp8,fp8,0,1.7741840362548829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,24,8,128,1,float16,float16,0,2.669592094421387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,fp8,fp8,0,0.9270655632019043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,24,128,1,float16,float16,0,2.2923568725585937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,float16,0,0.9377440452575684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,float16,fp8,0,0.8261024475097656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,1,128,1,fp8,fp8,0,0.8385552406311035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,fp8,0,0.8531295776367187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,float16,float16,0,0.9896335601806641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,2,128,1,fp8,fp8,0,0.8222751617431641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,fp8,0,0.8609007835388184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,float16,float16,0,1.1062159538269043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,4,128,1,fp8,fp8,0,0.8231360435485839
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,fp8,0,0.4806047916412354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,fp8,0,0.8317440032958985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,float16,0,0.4900559902191162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,fp8,fp8,0,0.824015998840332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,fp8,fp8,0,0.4840847969055176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,float16,fp8,0,0.4286992073059082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,24,8,128,1,float16,float16,0,1.3535391807556152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,24,128,1,float16,float16,0,1.1929871559143066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,1,128,1,fp8,fp8,0,0.4277071952819824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,fp8,0,0.448360013961792
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,float16,float16,0,0.5106063842773437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,2,128,1,fp8,fp8,0,0.4282671928405762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,float16,0,0.5675519943237305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,float16,fp8,0,0.4292912006378174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,fp8,fp8,0,0.4424431800842285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,4,128,1,fp8,fp8,0,0.4281167984008789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,fp8,0,0.4282688140869141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,24,8,128,1,float16,float16,0,0.6905312061309814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,fp8,0,0.25812320709228515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,float16,float16,0,0.6016608238220215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,24,128,1,fp8,fp8,0,0.25906240940093994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,float16,0,0.2642751932144165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,float16,fp8,0,0.23503680229187013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,1,128,1,fp8,fp8,0,0.23068480491638182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,float16,0,0.27496159076690674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,float16,fp8,0,0.2319711923599243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,2,128,1,fp8,fp8,0,0.23063039779663086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,float16,0,0.3041359901428223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,float16,fp8,0,0.2313551902770996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,4,128,1,fp8,fp8,0,0.23262081146240235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,float16,0,0.36607999801635743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,float16,fp8,0,0.23149919509887695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,24,8,128,1,fp8,fp8,0,0.23198881149291992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,float16,0,0.32409279346466063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,float16,fp8,0,0.14683519601821898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,24,128,1,fp8,fp8,0,0.14673759937286376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,float16,0,0.14914079904556274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,float16,fp8,0,0.13341439962387086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,1,128,1,fp8,fp8,0,0.13161120414733887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,float16,0,0.15648800134658813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,float16,fp8,0,0.13200479745864868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,2,128,1,fp8,fp8,0,0.13219519853591918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,float16,0,0.16951839923858641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,float16,fp8,0,0.13170239925384522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,4,128,1,fp8,fp8,0,0.13193119764328004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,float16,0,0.207260799407959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,float16,fp8,0,0.1324031949043274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,24,8,128,1,fp8,fp8,0,0.13251999616622925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,float16,0,0.1853327989578247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,float16,fp8,0,0.09035840034484863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,fp8,0,0.08352479934692383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,24,128,1,fp8,fp8,0,0.09036160111427308
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,float16,0,0.09630079865455628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,float16,fp8,0,0.08341280221939087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,1,128,1,fp8,fp8,0,0.08428320288658142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,float16,0,0.11477119922637939
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,float16,float16,0,0.10018080472946167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,2,128,1,fp8,fp8,0,0.08324480056762695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,float16,0,0.10486880540847779
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,float16,fp8,0,0.08372319936752319
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,4,128,1,fp8,fp8,0,0.08389599919319153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,float16,fp8,0,0.08330079913139343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,24,8,128,1,fp8,fp8,0,0.08395519852638245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,float16,0,1.8420719146728515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,float16,fp8,0,1.6437471389770508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,1,128,1,fp8,fp8,0,1.6470399856567384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,fp8,0,1.647420883178711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,float16,float16,0,2.0161760330200194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,2,128,1,fp8,fp8,0,1.6453823089599608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,fp8,0,1.644343948364258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,float16,float16,0,2.3196640014648438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,4,128,1,fp8,fp8,0,1.6432191848754882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,fp8,0,0.9752063751220703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,fp8,0,1.645894432067871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,fp8,fp8,0,1.0326319694519044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,fp8,fp8,0,1.8502815246582032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,float16,0,0.9312784194946289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,float16,fp8,0,0.8349136352539063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,1,128,1,fp8,fp8,0,0.8366448402404785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,24,8,128,1,float16,float16,0,2.95696964263916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,24,128,1,float16,float16,0,2.750641632080078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,float16,0,1.0204319953918457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,float16,fp8,0,0.8603167533874512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,2,128,1,fp8,fp8,0,0.8785504341125489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,fp8,0,0.8410927772521972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,fp8,fp8,0,0.8398320198059082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,4,128,1,float16,float16,0,1.1729680061340333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,fp8,0,0.8382783889770508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,fp8,0,0.501307201385498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,fp8,fp8,0,0.840401554107666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,fp8,fp8,0,0.5006991863250733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,float16,0,0.48770718574523925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,24,8,128,1,float16,float16,0,1.4945152282714844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,float16,fp8,0,0.4314576148986816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,24,128,1,float16,float16,0,1.4174240112304688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,1,128,1,fp8,fp8,0,0.43059678077697755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,float16,0,0.5182064056396485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,float16,fp8,0,0.4295055866241455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,2,128,1,fp8,fp8,0,0.43875842094421386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,fp8,fp8,0,0.43167839050292967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,fp8,0,0.43106718063354493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,4,128,1,float16,float16,0,0.5987855911254882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,fp8,0,0.4312592029571533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,float16,0,0.7117119789123535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,fp8,fp8,0,0.4318880081176758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,24,8,128,1,float16,float16,0,0.7632287979125977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,float16,fp8,0,0.2683808088302612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,24,128,1,fp8,fp8,0,0.26473278999328614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,float16,0,0.25657119750976565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,float16,fp8,0,0.22850399017333983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,1,128,1,fp8,fp8,0,0.2283616065979004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,float16,0,0.27613279819488523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,float16,fp8,0,0.22880160808563232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,2,128,1,fp8,fp8,0,0.22803359031677245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,float16,0,0.31921920776367185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,float16,fp8,0,0.22886080741882325
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,4,128,1,fp8,fp8,0,0.22830240726470946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,float16,0,0.39628798961639405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,float16,fp8,0,0.22894079685211183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,24,8,128,1,fp8,fp8,0,0.22909600734710694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,fp8,0,0.14602080583572388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,float16,float16,0,0.3747983932495117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,24,128,1,fp8,fp8,0,0.14663039445877074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,float16,0,0.14981759786605836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,float16,fp8,0,0.12673280239105225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,1,128,1,fp8,fp8,0,0.1271247982978821
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,float16,0,0.15895040035247804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,float16,fp8,0,0.1270591974258423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,2,128,1,fp8,fp8,0,0.12670400142669677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,float16,0,0.1792736053466797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,float16,fp8,0,0.1281183958053589
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,4,128,1,fp8,fp8,0,0.1277135968208313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,float16,0,0.21733760833740234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,float16,fp8,0,0.12798240184783935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,24,8,128,1,fp8,fp8,0,0.12838720083236693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,float16,0,0.20672318935394288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,float16,fp8,0,0.08513280153274536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,24,128,1,fp8,fp8,0,0.08584799766540527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,float16,0,0.08897439837455749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,float16,fp8,0,0.07694560289382935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,1,128,1,fp8,fp8,0,0.07619680166244507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,float16,0,0.09367039799690247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,float16,fp8,0,0.07665759921073914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,2,128,1,fp8,fp8,0,0.07642880082130432
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,float16,0,0.09944800138473511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,float16,fp8,0,0.07661920189857482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,4,128,1,fp8,fp8,0,0.07650079727172851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,float16,0,0.11755839586257935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,float16,fp8,0,0.07646080255508422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,24,8,128,1,fp8,fp8,0,0.07681279778480529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,float16,0,0.10457760095596313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,float16,fp8,0,0.05554080009460449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,24,128,1,fp8,fp8,0,0.05577279925346375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,fp8,fp8,0,0.05146719813346863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,float16,0,0.0636352002620697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,float16,fp8,0,0.051665598154067995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,1,128,1,fp8,fp8,0,0.05151519775390625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,float16,0,0.06368319988250733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,2,128,1,float16,fp8,0,0.05146399736404419
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,float16,0,0.06841599941253662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,float16,fp8,0,0.051855999231338504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,4,128,1,fp8,fp8,0,0.051283198595047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,float16,0,0.07465599775314331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,float16,fp8,0,0.05145599842071533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,24,8,128,1,fp8,fp8,0,0.05114399790763855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,float16,0,1.1600048065185546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,float16,fp8,0,1.0536160469055176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,1,128,1,fp8,fp8,0,1.0547247886657716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,fp8,0,1.0549936294555664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,float16,float16,0,1.2813967704772948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,2,128,1,fp8,fp8,0,1.05413761138916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,fp8,fp8,0,1.053548812866211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,fp8,0,1.0964719772338867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,4,128,1,float16,float16,0,1.519491195678711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,fp8,0,1.0543696403503418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,fp8,fp8,0,1.053279972076416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,fp8,0,0.6425615787506104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,24,8,128,1,float16,float16,0,2.0047840118408202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,fp8,fp8,0,0.6608528137207031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,float16,0,0.5901855945587158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,float16,fp8,0,0.5372432231903076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,1,128,1,fp8,fp8,0,0.5367008209228515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,24,128,1,float16,float16,0,1.9581792831420899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,float16,0,0.6522992134094239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,float16,fp8,0,0.5371007919311523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,2,128,1,fp8,fp8,0,0.5373600006103516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,fp8,0,0.5378880023956298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,float16,float16,0,0.771726417541504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,4,128,1,fp8,fp8,0,0.5368127822875977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,fp8,0,0.5381008148193359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,fp8,fp8,0,0.5372159957885743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,fp8,0,0.3331360101699829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,fp8,fp8,0,0.333243203163147
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,fp8,0,0.2794303894042969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,24,8,128,1,float16,float16,0,1.01627197265625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,float16,float16,0,0.3130768060684204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,float16,0,0.34247679710388185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,24,128,1,float16,float16,0,0.9941583633422851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,1,128,1,fp8,fp8,0,0.2792704105377197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,float16,fp8,0,0.2800863981246948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,fp8,0,0.2799472093582153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,2,128,1,fp8,fp8,0,0.2798192024230957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,float16,float16,0,0.4023695945739746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,4,128,1,fp8,fp8,0,0.2815327882766724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,fp8,0,0.28039519786834716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,float16,float16,0,0.5220160007476806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,24,8,128,1,fp8,fp8,0,0.28012158870697024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,fp8,fp8,0,0.1513167977333069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,fp8,0,0.17877600193023682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,float16,0,0.18959039449691772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,float16,float16,0,0.5138448238372803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,24,128,1,fp8,fp8,0,0.17872799634933473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,float16,0,0.17716000080108643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,1,128,1,float16,fp8,0,0.15268479585647582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,float16,fp8,0,0.15114400386810303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,2,128,1,fp8,fp8,0,0.15199359655380248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,float16,0,0.21949119567871095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,float16,fp8,0,0.15155839920043945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,4,128,1,fp8,fp8,0,0.15223679542541504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,fp8,0,0.1525007963180542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,float16,float16,0,0.27773280143737794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,24,8,128,1,fp8,fp8,0,0.15359359979629517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,float16,0,0.27337920665740967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,float16,fp8,0,0.10120320320129395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,24,128,1,fp8,fp8,0,0.10095039606094361
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,float16,0,0.10004639625549316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,float16,fp8,0,0.08626239895820617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,1,128,1,fp8,fp8,0,0.08642399907112122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,float16,0,0.10925600528717042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,float16,fp8,0,0.08716319799423218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,2,128,1,fp8,fp8,0,0.08645280003547669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,float16,0,0.12524800300598143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,float16,fp8,0,0.08624799847602845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,4,128,1,fp8,fp8,0,0.08675360083580017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,float16,0,0.157478404045105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,float16,fp8,0,0.086763197183609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,24,8,128,1,fp8,fp8,0,0.08656799793243408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,float16,0,0.15287200212478638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,float16,fp8,0,0.06005600094795227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,24,128,1,fp8,fp8,0,0.05962399840354919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,float16,0,0.0659824013710022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,float16,fp8,0,0.05379520058631897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,1,128,1,fp8,fp8,0,0.05397760272026062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,float16,0,0.0699455976486206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,float16,fp8,0,0.05396800041198731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,2,128,1,fp8,fp8,0,0.053839999437332156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,float16,0,0.07508800029754639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,float16,fp8,0,0.053620797395706174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,4,128,1,fp8,fp8,0,0.05378400087356568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,float16,0,0.0840448021888733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,fp8,0,0.03306719958782196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,float16,fp8,0,0.053635197877883914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,24,8,128,1,fp8,fp8,0,0.053995198011398314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,float16,0,0.07252799868583679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,float16,fp8,0,0.03537440001964569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,24,128,1,fp8,fp8,0,0.03507040143013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,float16,float16,0,0.043243199586868286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,1,128,1,fp8,fp8,0,0.032948800921440126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,float16,0,0.04418720006942749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,float16,fp8,0,0.03306879997253418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,2,128,1,fp8,fp8,0,0.03295519948005676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,float16,0,0.04808639883995056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,float16,fp8,0,0.033022400736808774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,4,128,1,fp8,fp8,0,0.03281759917736053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,float16,0,0.052902400493621826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,float16,fp8,0,0.03293119966983795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,24,8,128,1,fp8,fp8,0,0.03290719985961914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,float16,0,1.2388671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,float16,fp8,0,1.1500096321105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,1,128,1,fp8,fp8,0,1.1487855911254883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,float16,0,1.4102879524230958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,float16,fp8,0,1.1497296333312987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,2,128,1,fp8,fp8,0,1.1503408432006836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,fp8,0,1.1807600021362306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,fp8,fp8,0,1.1496447563171386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,4,128,1,float16,float16,0,1.7366287231445312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,fp8,fp8,0,1.1485424041748047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,fp8,0,1.149009609222412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,fp8,0,0.7238719940185547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,float16,0,0.6367424011230469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,fp8,fp8,0,0.7234159946441651
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,24,8,128,1,float16,float16,0,2.3811008453369142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,float16,fp8,0,0.5831120014190674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,1,128,1,fp8,fp8,0,0.5832880020141602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,fp8,0,0.5833648204803467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,fp8,fp8,0,0.5840288162231445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,2,128,1,float16,float16,0,0.7170000076293945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,24,128,1,float16,float16,0,2.454142379760742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,fp8,fp8,0,0.5833856105804444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,float16,0,0.8810895919799805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,4,128,1,float16,fp8,0,0.583403205871582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,fp8,0,0.5851312160491944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,fp8,fp8,0,0.5840847969055176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,fp8,0,0.3711344003677368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,fp8,fp8,0,0.37081279754638674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,24,8,128,1,float16,float16,0,1.203592014312744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,float16,0,0.3344736099243164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,float16,fp8,0,0.30124640464782715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,1,128,1,fp8,fp8,0,0.30140318870544436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,24,128,1,float16,float16,0,1.2406352043151856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,float16,0,0.37621440887451174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,float16,fp8,0,0.30146400928497313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,2,128,1,fp8,fp8,0,0.30062720775604246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,float16,0,0.4544976234436035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,float16,fp8,0,0.30144639015197755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,4,128,1,fp8,fp8,0,0.30165278911590576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,fp8,0,0.30185120105743407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,float16,0,0.18564480543136597
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,fp8,fp8,0,0.3022752046585083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,24,8,128,1,float16,float16,0,0.6155648231506348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,fp8,0,0.1955839991569519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,fp8,fp8,0,0.1960927963256836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,24,128,1,float16,float16,0,0.6354991912841796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,float16,fp8,0,0.16098400354385375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,1,128,1,fp8,fp8,0,0.1599455952644348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,float16,0,0.206060791015625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,float16,fp8,0,0.16160160303115845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,float16,0,0.3222383975982666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,2,128,1,fp8,fp8,0,0.16035840511322022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,float16,0,0.24480319023132324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,float16,fp8,0,0.160315203666687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,4,128,1,fp8,fp8,0,0.16048480272293092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,float16,fp8,0,0.16138240098953247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,24,8,128,1,fp8,fp8,0,0.16102240085601807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,fp8,0,0.10836160182952881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,float16,0,0.11860959529876709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,float16,float16,0,0.3327552080154419
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,24,128,1,fp8,fp8,0,0.10787520408630372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,float16,0,0.11037919521331788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,float16,fp8,0,0.08910400271415711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,1,128,1,fp8,fp8,0,0.08884639739990234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,float16,fp8,0,0.0891215980052948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,2,128,1,fp8,fp8,0,0.08869919776916504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,float16,0,0.1391711950302124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,float16,fp8,0,0.09005439877510071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,4,128,1,fp8,fp8,0,0.08907039761543274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,float16,0,0.17765120267868043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,float16,fp8,0,0.09010080099105836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,24,8,128,1,fp8,fp8,0,0.0899071991443634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,float16,0,0.18134720325469972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,float16,fp8,0,0.06206880211830139
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,24,128,1,fp8,fp8,0,0.061959999799728396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,float16,0,0.06451839804649354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,float16,fp8,0,0.05355200171470642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,1,128,1,fp8,fp8,0,0.053401601314544675
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,float16,0,0.06843680143356323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,float16,fp8,0,0.053540802001953124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,2,128,1,fp8,fp8,0,0.05304319858551025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,float16,0,0.0743120014667511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,float16,fp8,0,0.052964800596237184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,4,128,1,fp8,fp8,0,0.05372959971427917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,float16,0,0.09215199947357178
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,float16,fp8,0,0.05362399816513062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,24,8,128,1,fp8,fp8,0,0.05336639881134033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,float16,0,0.09011359810829163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,float16,fp8,0,0.04078719913959503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,24,128,1,fp8,fp8,0,0.0406576007604599
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,float16,0,0.04747360050678253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,float16,fp8,0,0.03703039884567261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,1,128,1,fp8,fp8,0,0.03710080087184906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,float16,0,0.04905920028686524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,float16,fp8,0,0.03659839928150177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,2,128,1,fp8,fp8,0,0.03710559904575348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,float16,0,0.05321120023727417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,float16,fp8,0,0.03702400028705597
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,4,128,1,fp8,fp8,0,0.03686720132827759
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,fp8,fp8,0,0.02680160105228424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,float16,0,0.05946559906005859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,float16,fp8,0,0.037108799815177916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,24,8,128,1,fp8,fp8,0,0.03705120086669922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,float16,0,0.05483199954032898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,24,128,1,float16,fp8,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,float16,0,0.0371071994304657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,float16,fp8,0,0.024823999404907225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,1,128,1,fp8,fp8,0,0.024771200120449068
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,float16,0,0.036504000425338745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,float16,fp8,0,0.024852800369262695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,2,128,1,fp8,fp8,0,0.02502560019493103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,float16,0,0.037108799815177916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,float16,fp8,0,0.024872000515460967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,4,128,1,fp8,fp8,0,0.024828800559043886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,float16,0,0.041252800822258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,float16,fp8,0,0.02475679963827133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,24,8,128,1,fp8,fp8,0,0.024854399263858795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,float16,0,0.9827615737915039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,float16,fp8,0,0.8975855827331543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,1,128,1,fp8,fp8,0,0.8991231918334961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,float16,0,1.1446271896362306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,float16,fp8,0,0.8978896141052246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,2,128,1,fp8,fp8,0,0.8979840278625488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,fp8,0,0.8956463813781739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,fp8,fp8,0,0.8959839820861817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,4,128,1,float16,float16,0,1.4713552474975586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,fp8,0,0.8959039688110352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,fp8,fp8,0,0.8964176177978516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,fp8,0,0.5930143833160401
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,fp8,fp8,0,0.5906623840332031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,float16,0,0.5069503784179688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,float16,fp8,0,0.4557663917541504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,1,128,1,fp8,fp8,0,0.45566401481628416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,24,8,128,1,float16,float16,0,2.104689598083496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,fp8,0,0.45673279762268065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,fp8,fp8,0,0.4565824031829834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,2,128,1,float16,float16,0,0.5868879795074463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,fp8,0,0.4553376197814941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,float16,float16,0,0.7477295875549317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,4,128,1,fp8,fp8,0,0.4558767795562744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,fp8,fp8,0,0.4552127838134766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,24,128,1,float16,float16,0,2.3019775390625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,fp8,0,0.4553520202636719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,fp8,0,0.3035264015197754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,fp8,fp8,0,0.30358879566192626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,24,8,128,1,float16,float16,0,1.0638400077819825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,float16,0,0.2685215950012207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,float16,fp8,0,0.23564798831939698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,1,128,1,fp8,fp8,0,0.23627519607543945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,24,128,1,float16,float16,0,1.164737606048584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,float16,0,0.30754880905151366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,float16,fp8,0,0.2357072114944458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,2,128,1,fp8,fp8,0,0.23619039058685304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,float16,0,0.3861439943313599
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,float16,fp8,0,0.23606879711151124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,fp8,0,0.16047040224075318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,4,128,1,fp8,fp8,0,0.23540000915527343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,fp8,0,0.2365056037902832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,fp8,0,0.1261247992515564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,float16,float16,0,0.5448703765869141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,24,8,128,1,fp8,fp8,0,0.23618719577789307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,fp8,fp8,0,0.160588800907135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,24,128,1,float16,float16,0,0.5963039875030518
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,float16,float16,0,0.15022079944610595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,1,128,1,fp8,fp8,0,0.1254256010055542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,float16,0,0.17084159851074218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,float16,fp8,0,0.12612320184707643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,2,128,1,fp8,fp8,0,0.12588640451431274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,float16,0,0.20784640312194824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,float16,fp8,0,0.1260640025138855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,4,128,1,fp8,fp8,0,0.12635680437088012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,float16,0,0.2861743927001953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,float16,fp8,0,0.12654880285263062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,24,8,128,1,fp8,fp8,0,0.12720799446105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,fp8,0,0.08896160125732422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,float16,0,0.09905920028686524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,float16,fp8,0,0.07009440064430236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,float16,float16,0,0.3113312005996704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,24,128,1,fp8,fp8,0,0.0887167990207672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,float16,0,0.09006239771842957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,float16,fp8,0,0.06986560225486756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,1,128,1,fp8,fp8,0,0.06978080272674561
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,2,128,1,fp8,fp8,0,0.07010239958763123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,fp8,0,0.0707040011882782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,float16,float16,0,0.11905920505523682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,4,128,1,fp8,fp8,0,0.0700111985206604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,float16,0,0.15751680135726928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,float16,fp8,0,0.07108640074729919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,24,8,128,1,fp8,fp8,0,0.07102239727973939
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,float16,0,0.16834880113601686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,float16,fp8,0,0.05004159808158874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,24,128,1,fp8,fp8,0,0.051051199436187744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,float16,0,0.051630401611328126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,float16,fp8,0,0.04129279851913452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,1,128,1,fp8,fp8,0,0.04120480120182037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,float16,0,0.05636640191078186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,float16,fp8,0,0.04185279905796051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,fp8,fp8,0,0.04120480120182037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,2,128,1,fp8,fp8,0,0.04134719967842102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,float16,0,0.06355519890785218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,float16,fp8,0,0.042135998606681824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,4,128,1,fp8,fp8,0,0.04123679995536804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,float16,0,0.08216959834098816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,24,8,128,1,float16,fp8,0,0.04126879870891571
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,float16,0,0.08234400153160096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,float16,fp8,0,0.03294720053672791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,24,128,1,fp8,fp8,0,0.03303999900817871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,float16,0,0.039113599061965945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,float16,fp8,0,0.02882080078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,1,128,1,fp8,fp8,0,0.028598400950431823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,float16,0,0.03931680023670196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,float16,fp8,0,0.028889599442481994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,2,128,1,fp8,fp8,0,0.028790399432182312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,float16,0,0.044924798607826236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,float16,fp8,0,0.02905279994010925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,4,128,1,fp8,fp8,0,0.02900159955024719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,float16,0,0.05055040121078491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,float16,fp8,0,0.028832000494003297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,24,8,128,1,fp8,fp8,0,0.028935998678207397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,float16,0,0.049348801374435425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,float16,fp8,0,0.021694399416446686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,24,128,1,fp8,fp8,0,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,float16,0,0.03110400140285492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,float16,fp8,0,0.01880960017442703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,1,128,1,fp8,fp8,0,0.018771199882030486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,float16,0,0.030955201387405394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,float16,fp8,0,0.02054080069065094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,2,128,1,fp8,fp8,0,0.019023999571800232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,fp8,fp8,0,0.020678399503231047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,float16,0,0.03057439923286438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,float16,fp8,0,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,4,128,1,fp8,fp8,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,float16,0,0.035087999701499936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,24,8,128,1,float16,fp8,0,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,float16,0,0.03711999952793121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,float16,fp8,0,0.019417600333690645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,24,128,1,fp8,fp8,0,0.01873600035905838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,float16,0,0.029014399647712706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,float16,fp8,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,1,128,1,fp8,fp8,0,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,float16,0,0.029014399647712706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,float16,fp8,0,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,2,128,1,fp8,fp8,0,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,float16,0,0.028951999545097352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,float16,fp8,0,0.018750399351119995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,4,128,1,fp8,fp8,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,float16,0,0.029417601227760316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,float16,0,0.42893757820129397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,float16,fp8,0,0.01870719939470291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,24,8,128,1,fp8,fp8,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,float16,fp8,0,0.38563361167907717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,fp8,fp8,0,0.38496320247650145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,1,128,1,fp8,fp8,0,0.38514559268951415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,float16,0,0.5078847885131836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,2,128,1,float16,fp8,0,0.38465280532836915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,fp8,0,0.38472959995269773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,float16,float16,0,0.6667776107788086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,4,128,1,fp8,fp8,0,0.3849407911300659
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,fp8,0,0.3849263906478882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,fp8,fp8,0,0.26640160083770753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,fp8,fp8,0,0.3842560052871704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,fp8,0,0.2647504091262817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,24,8,128,1,float16,float16,0,0.9857728004455566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,float16,0,0.22830560207366943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,float16,fp8,0,0.19794559478759766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,1,128,1,fp8,fp8,0,0.1978224039077759
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,float16,0,0.2668031930923462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,24,128,1,float16,float16,0,1.1211135864257813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,float16,fp8,0,0.19705439805984498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,2,128,1,fp8,fp8,0,0.1976032018661499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,float16,0,0.34509758949279784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,float16,fp8,0,0.19769439697265626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,4,128,1,fp8,fp8,0,0.1982591986656189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,fp8,0,0.198689603805542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,float16,float16,0,0.5021503925323486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,24,8,128,1,fp8,fp8,0,0.19824800491333008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,fp8,0,0.1409216046333313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,fp8,fp8,0,0.14163199663162232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,float16,0,0.1287071943283081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,24,128,1,float16,float16,0,0.5743616104125977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,float16,0,0.18789279460906982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,float16,fp8,0,0.10691039562225342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,float16,fp8,0,0.10665600299835205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,1,128,1,fp8,fp8,0,0.1068079948425293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,float16,0,0.14952160120010377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,fp8,fp8,0,0.10694080591201782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,float16,fp8,0,0.10650399923324586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,2,128,1,fp8,fp8,0,0.1067855954170227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,4,128,1,fp8,fp8,0,0.10680160522460938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,fp8,0,0.10672800540924073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,24,8,128,1,float16,float16,0,0.26502399444580077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,fp8,0,0.07945759892463684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,fp8,fp8,0,0.07884479761123657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,24,128,1,float16,float16,0,0.301311993598938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,float16,0,0.08029599785804749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,float16,fp8,0,0.05973759889602661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,1,128,1,fp8,fp8,0,0.05985919833183288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,float16,0,0.0882095992565155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,float16,fp8,0,0.059640002250671384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,2,128,1,fp8,fp8,0,0.059824001789093015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,float16,0,0.10827679634094238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,fp8,fp8,0,0.06127520203590393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,float16,fp8,0,0.059575998783111574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,4,128,1,fp8,fp8,0,0.05990719795227051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,float16,0,0.1470576047897339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,24,8,128,1,float16,fp8,0,0.061627197265625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,float16,0,0.16199519634246826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,float16,0,0.04941760003566742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,float16,fp8,0,0.04347679913043976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,24,128,1,fp8,fp8,0,0.04325760006904602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,float16,0,0.043782401084899905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,float16,fp8,0,0.035236799716949464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,1,128,1,fp8,fp8,0,0.0352288007736206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,float16,fp8,0,0.03496640026569366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,2,128,1,fp8,fp8,0,0.0350928008556366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,float16,0,0.05579040050506592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,float16,fp8,0,0.035094401240348815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,4,128,1,fp8,fp8,0,0.03493599891662598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,float16,0,0.07601280212402343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,float16,fp8,0,0.03505919873714447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,24,8,128,1,fp8,fp8,0,0.03495840132236481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,float16,0,0.0805072009563446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,float16,fp8,0,0.029044800996780397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,24,128,1,fp8,fp8,0,0.02885119915008545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,float16,0,0.0350383996963501
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,float16,fp8,0,0.024884800612926482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,1,128,1,fp8,fp8,0,0.02481440007686615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,float16,0,0.03504799902439117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,float16,fp8,0,0.02484800070524216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,2,128,1,fp8,fp8,0,0.024743999540805816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,float16,0,0.040427199006080626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,float16,fp8,0,0.025044798851013184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,4,128,1,fp8,fp8,0,0.024726399779319765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,float16,0,0.04561119973659515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,float16,fp8,0,0.024835200607776643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,24,8,128,1,fp8,fp8,0,0.02473759949207306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,float16,0,0.045296001434326175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,float16,fp8,0,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,fp8,fp8,0,0.01652960032224655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,24,128,1,fp8,fp8,0,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,float16,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,fp8,fp8,0,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,1,128,1,fp8,fp8,0,0.01666080057621002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,float16,0,0.026811200380325317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,2,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,fp8,0,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,float16,0,0.026897600293159483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,4,128,1,float16,fp8,0,0.017076799273490907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,fp8,0,0.016707199811935424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,fp8,fp8,0,0.015727999806404113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,float16,float16,0,0.03203040063381195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,24,8,128,1,fp8,fp8,0,0.01656319946050644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,float16,float16,0,0.03499679863452911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,24,128,1,fp8,fp8,0,0.016521599888801575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,fp8,0,0.014564800262451171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,float16,0,0.02685759961605072
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,fp8,fp8,0,0.014664000272750855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,1,128,1,float16,fp8,0,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,fp8,0,0.01600320041179657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,float16,float16,0,0.026785600185394286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,2,128,1,fp8,fp8,0,0.014691199362277984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,4,128,1,float16,float16,0,0.02686080038547516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,float16,0,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,float16,fp8,0,0.015110400319099427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,24,8,128,1,fp8,fp8,0,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,float16,0,0.02882080078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,float16,fp8,0,0.01570879966020584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,24,128,1,fp8,fp8,0,0.014611199498176575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,float16,0,0.024751999974250795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,float16,fp8,0,0.014563199877738953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,1,128,1,fp8,fp8,0,0.014560000598430633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,float16,0,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,float16,fp8,0,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,2,128,1,fp8,fp8,0,0.014587199687957764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,float16,0,0.024859200417995452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,float16,fp8,0,0.014500799775123595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,4,128,1,fp8,fp8,0,0.01459839940071106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,float16,0,0.02487040013074875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,float16,fp8,0,0.014683200418949128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,24,8,128,1,fp8,fp8,0,0.014593599736690522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,float16,0,0.2624000072479248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,float16,fp8,0,0.22915520668029785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,1,128,1,fp8,fp8,0,0.22912321090698243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,float16,0,0.3001199960708618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,fp8,fp8,0,0.22870240211486817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,float16,fp8,0,0.22859840393066405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,2,128,1,fp8,fp8,0,0.22905280590057372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,float16,0,0.37706561088562013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,4,128,1,float16,fp8,0,0.2284480094909668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,fp8,0,0.22903039455413818
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,float16,float16,0,0.5341472148895263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,24,8,128,1,fp8,fp8,0,0.22885599136352539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,fp8,fp8,0,0.11940159797668456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,fp8,0,0.15402400493621826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,fp8,fp8,0,0.1539023995399475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,24,128,1,float16,float16,0,0.5868607997894287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,float16,0,0.14417760372161864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,1,128,1,float16,fp8,0,0.1196943998336792
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,float16,0,0.1634400010108948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,float16,fp8,0,0.11924639940261841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,2,128,1,fp8,fp8,0,0.11947360038757324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,float16,0,0.20206239223480224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,fp8,fp8,0,0.11956640481948852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,float16,fp8,0,0.11982719898223877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,4,128,1,fp8,fp8,0,0.11951680183410644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,float16,0,0.2777776002883911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,24,8,128,1,float16,fp8,0,0.11995520591735839
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,fp8,0,0.08423039913177491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,float16,float16,0,0.3038912057876587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,24,128,1,fp8,fp8,0,0.08364160060882568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,float16,0,0.08665120005607604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,float16,fp8,0,0.06466079950332641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,1,128,1,fp8,fp8,0,0.06550559997558594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,float16,0,0.09470400214195251
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,float16,fp8,0,0.06562399864196777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,2,128,1,fp8,fp8,0,0.06575040221214294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,float16,0,0.11347039937973022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,float16,fp8,0,0.0653007984161377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,4,128,1,fp8,fp8,0,0.0648032009601593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,float16,0,0.15162880420684816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,float16,fp8,0,0.06578559875488281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,24,8,128,1,fp8,fp8,0,0.06612960100173951
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,float16,0,0.1648527979850769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,float16,fp8,0,0.04735200107097626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,24,128,1,fp8,fp8,0,0.047244799137115476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,fp8,fp8,0,0.03718239963054657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,float16,0,0.047947201132774356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,float16,fp8,0,0.03718239963054657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,1,128,1,fp8,fp8,0,0.03739199936389923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,float16,0,0.053324800729751584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,2,128,1,float16,fp8,0,0.037636798620223996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,float16,0,0.06219840049743652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,float16,fp8,0,0.038020798563957216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,4,128,1,fp8,fp8,0,0.037459200620651244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,float16,0,0.07843679785728455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,float16,fp8,0,0.03840000033378601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,24,8,128,1,fp8,fp8,0,0.0377344012260437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,float16,0,0.07940319776535035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,float16,fp8,0,0.026899200677871705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,24,128,1,fp8,fp8,0,0.026793599128723145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,float16,0,0.03394719958305359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,float16,fp8,0,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,1,128,1,fp8,fp8,0,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,float16,0,0.03504799902439117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,float16,fp8,0,0.02276639938354492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,2,128,1,fp8,fp8,0,0.02273920029401779
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,float16,0,0.039043200016021726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,float16,fp8,0,0.02280319929122925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,4,128,1,fp8,fp8,0,0.022745600342750548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,float16,0,0.04530560076236725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,float16,fp8,0,0.0228752002120018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,24,8,128,1,fp8,fp8,0,0.022801600396633148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,float16,0,0.046348801255226134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,fp8,0,0.016756799817085267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,float16,fp8,0,0.01866080015897751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,fp8,fp8,0,0.016705599427223206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,24,128,1,fp8,fp8,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,float16,0,0.027193599939346315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,float16,fp8,0,0.016630400717258454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,1,128,1,fp8,fp8,0,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,2,128,1,float16,float16,0,0.02884640097618103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,float16,0,0.028911998867988585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,float16,fp8,0,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,4,128,1,fp8,fp8,0,0.016707199811935424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,float16,0,0.03328480124473572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,float16,fp8,0,0.01668799966573715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,24,8,128,1,fp8,fp8,0,0.016700799763202667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,float16,0,0.031188800930976868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,float16,fp8,0,0.01250240057706833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,24,128,1,fp8,fp8,0,0.01276479959487915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,float16,0,0.02267040014266968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,1,128,1,fp8,fp8,0,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,float16,0,0.022705599665641785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,float16,fp8,0,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,fp8,fp8,0,0.012465599924325943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,2,128,1,fp8,fp8,0,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,float16,0,0.022703999280929567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,float16,fp8,0,0.012567999958992004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,4,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,float16,0,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,24,8,128,1,float16,fp8,0,0.012432000041007996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,float16,0,0.022814400494098663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,float16,fp8,0,0.012478400021791458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,24,128,1,fp8,fp8,0,0.01244800016283989
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,float16,0,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,float16,fp8,0,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,1,128,1,fp8,fp8,0,0.011444800347089768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,float16,0,0.020873600244522096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,float16,fp8,0,0.010606399923563003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,2,128,1,fp8,fp8,0,0.010556799918413162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,float16,0,0.020769600570201874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,float16,fp8,0,0.010599999874830245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,4,128,1,fp8,fp8,0,0.01225920021533966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,float16,0,0.02086720019578934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,float16,fp8,0,0.012145599722862244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,24,8,128,1,fp8,fp8,0,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,float16,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,float16,fp8,0,0.011086399853229522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,24,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,float16,0,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,float16,fp8,0,0.010700800269842149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,fp8,0,0.010886400192975997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,1,128,1,fp8,fp8,0,0.010793600231409073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,float16,0,0.020710399746894835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,2,128,1,fp8,fp8,0,0.010574399679899215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,float16,float16,0,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,4,128,1,fp8,fp8,0,0.011423999816179276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,float16,0,0.020585599541664123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,24,8,128,1,fp8,fp8,0,0.01077599972486496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,float16,0,0.2025696039199829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,float16,fp8,0,0.17098560333251953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,1,128,1,fp8,fp8,0,0.17147519588470458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,float16,0,0.22019679546356202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,float16,fp8,0,0.17172640562057495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,2,128,1,fp8,fp8,0,0.17086559534072876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,float16,0,0.2583184003829956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,float16,fp8,0,0.1723744034767151
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,4,128,1,fp8,fp8,0,0.17110879421234132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,float16,0,0.3350800037384033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,float16,fp8,0,0.1712496042251587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,float16,0,0.11468960046768188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,24,8,128,1,fp8,fp8,0,0.1716480016708374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,fp8,0,0.10819040536880493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,float16,float16,0,0.3325711965560913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,24,128,1,fp8,fp8,0,0.10879520177841187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,float16,fp8,0,0.0900048017501831
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,float16,0,0.1418992042541504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,1,128,1,fp8,fp8,0,0.08930559754371643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,float16,0,0.12291040420532226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,float16,fp8,0,0.09092000126838684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,fp8,fp8,0,0.09112319946289063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,2,128,1,fp8,fp8,0,0.09001119732856751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,float16,fp8,0,0.09119359850883484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,4,128,1,fp8,fp8,0,0.09100639820098877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,fp8,0,0.09099199771881103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,24,8,128,1,float16,float16,0,0.18020800352096558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,float16,0,0.17724640369415284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,float16,fp8,0,0.05947679877281189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,24,128,1,fp8,fp8,0,0.0593936026096344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,float16,0,0.0618336021900177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,float16,fp8,0,0.04958080053329468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,1,128,1,fp8,fp8,0,0.04951840043067932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,float16,0,0.06737120151519775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,float16,0,0.09484639763832092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,float16,fp8,0,0.05009440183639526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,2,128,1,fp8,fp8,0,0.04981440007686615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,float16,0,0.0745855987071991
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,float16,fp8,0,0.04953599870204926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,4,128,1,fp8,fp8,0,0.05033439993858337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,float16,fp8,0,0.05025759935379028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,24,8,128,1,fp8,fp8,0,0.049609598517417905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,float16,0,0.08608319759368896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,float16,fp8,0,0.03503519892692566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,24,128,1,fp8,fp8,0,0.03495039939880371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,float16,0,0.04229280054569244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,float16,fp8,0,0.029660800099372865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,1,128,1,fp8,fp8,0,0.030619201064109803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,float16,0,0.042263999581336975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,float16,fp8,0,0.030003198981285097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,2,128,1,fp8,fp8,0,0.031065601110458373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,float16,0,0.04724000096321106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,float16,fp8,0,0.030844798684120177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,4,128,1,fp8,fp8,0,0.030640000104904176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,float16,0,0.05306239724159241
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,fp8,0,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,float16,fp8,0,0.03097119927406311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,fp8,fp8,0,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,24,8,128,1,fp8,fp8,0,0.030979201197624207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,float16,0,0.049430400133132935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,float16,fp8,0,0.020751999318599702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,24,128,1,fp8,fp8,0,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,1,128,1,float16,float16,0,0.03094240128993988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,float16,0,0.030870398879051207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,float16,fp8,0,0.01875839978456497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,2,128,1,fp8,fp8,0,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,float16,0,0.03089759945869446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,fp8,0,0.016422399878501893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,float16,0,0.02683199942111969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,4,128,1,fp8,fp8,0,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,float16,0,0.035071998834609985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,float16,fp8,0,0.018963199853897095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,24,8,128,1,fp8,fp8,0,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,float16,float16,0,0.03497920036315918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,24,128,1,fp8,fp8,0,0.015345600247383118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,float16,fp8,0,0.014604799449443817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,1,128,1,fp8,fp8,0,0.0146479994058609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,float16,0,0.02492000013589859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,float16,fp8,0,0.014608000218868256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,2,128,1,fp8,fp8,0,0.014703999459743499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,float16,0,0.026675200462341307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,float16,fp8,0,0.014553600549697876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,4,128,1,fp8,fp8,0,0.014556799829006196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,float16,float16,0,0.026742398738861084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,24,8,128,1,fp8,fp8,0,0.014596800506114959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,fp8,0,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,float16,float16,0,0.024854399263858795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,24,128,1,fp8,fp8,0,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,float16,0,0.020828799903392793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,float16,fp8,0,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,1,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,float16,0,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,2,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,float16,0,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,float16,fp8,0,0.010571199655532836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,4,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,float16,fp8,0,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,24,8,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,float16,0,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,float16,fp8,0,0.010577599704265594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,24,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,float16,0,0.019350400567054747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,float16,fp8,0,0.01069760024547577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,float16,0,0.02069920003414154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,1,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,float16,0,0.019070400297641753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,float16,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,2,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,float16,0,0.01926079988479614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,4,128,1,fp8,fp8,0,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,float16,0,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,24,8,128,1,fp8,fp8,0,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,float16,0,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,24,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,float16,fp8,0,0.010545600205659866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,1,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,float16,0,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,float16,fp8,0,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,2,128,1,fp8,fp8,0,0.010320000350475311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,float16,0,0.01876319944858551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,float16,0,0.17692320346832274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,4,128,1,fp8,fp8,0,0.010585600137710571
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,float16,0,0.01876640021800995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,float16,fp8,0,0.010531199723482132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,24,8,128,1,fp8,fp8,0,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,float16,fp8,0,0.14602880477905272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,1,128,1,fp8,fp8,0,0.14640159606933595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,fp8,fp8,0,0.1478000044822693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,float16,0,0.18511359691619872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,float16,fp8,0,0.14618879556655884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,2,128,1,fp8,fp8,0,0.14631199836730957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,float16,0,0.20603039264678955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,4,128,1,float16,fp8,0,0.14644800424575805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,float16,0,0.2421247959136963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,float16,fp8,0,0.14702399969100952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,24,8,128,1,fp8,fp8,0,0.14829920530319213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,float16,0,0.2095168113708496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,float16,fp8,0,0.08605120182037354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,24,128,1,fp8,fp8,0,0.08627840280532836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,float16,0,0.095169597864151
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,float16,fp8,0,0.07736960053443909
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,1,128,1,fp8,fp8,0,0.07724000215530395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,float16,0,0.09936800003051757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,float16,fp8,0,0.07787520289421082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,2,128,1,fp8,fp8,0,0.07693279981613159
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,float16,0,0.10688480138778686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,float16,fp8,0,0.07697280049324036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,4,128,1,fp8,fp8,0,0.07772639989852906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,float16,0,0.12895840406417847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,float16,fp8,0,0.07749119997024537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,24,8,128,1,fp8,fp8,0,0.07743200063705444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,float16,0,0.10103839635848999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,float16,fp8,0,0.0473471999168396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,24,128,1,fp8,fp8,0,0.04742079973220825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,float16,0,0.057708799839019775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,float16,fp8,0,0.043438398838043214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,1,128,1,fp8,fp8,0,0.043412798643112184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,float16,0,0.05791040062904358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,float16,fp8,0,0.043556800484657286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,2,128,1,fp8,fp8,0,0.04338879883289337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,float16,0,0.06222079992294312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,float16,fp8,0,0.04327679872512817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,4,128,1,fp8,fp8,0,0.04356000125408173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,float16,0,0.06830239892005921
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,float16,fp8,0,0.04355039894580841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,24,8,128,1,fp8,fp8,0,0.04328320026397705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,float16,0,0.05775200128555298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,float16,fp8,0,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,24,128,1,fp8,fp8,0,0.02887200117111206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,float16,0,0.0392304003238678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,float16,fp8,0,0.027239999175071715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,1,128,1,fp8,fp8,0,0.026868799328804018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,float16,0,0.03917919993400574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,float16,fp8,0,0.026785600185394286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,2,128,1,fp8,fp8,0,0.026958400011062623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,float16,0,0.039715200662612915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,float16,fp8,0,0.026913601160049438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,4,128,1,fp8,fp8,0,0.026787200570106508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,fp8,fp8,0,0.018731200695037843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,float16,0,0.0450080007314682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,float16,fp8,0,0.02688960134983063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,24,8,128,1,fp8,fp8,0,0.027190399169921876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,float16,0,0.0390720009803772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,24,128,1,float16,fp8,0,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,float16,0,0.028896000981330872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,float16,fp8,0,0.016708800196647645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,1,128,1,fp8,fp8,0,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,float16,0,0.028966400027275085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,float16,fp8,0,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,2,128,1,fp8,fp8,0,0.01659200042486191
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,float16,0,0.02892000079154968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,4,128,1,fp8,fp8,0,0.016808000206947327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,float16,0,0.02884640097618103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,float16,fp8,0,0.016840000450611115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,24,8,128,1,fp8,fp8,0,0.016638399660587312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,float16,0,0.02887200117111206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,24,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,float16,0,0.023838399350643157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,float16,fp8,0,0.012505599856376648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,1,128,1,fp8,fp8,0,0.013790400326251983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,float16,0,0.02290080040693283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,fp8,fp8,0,0.012604799866676331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,2,128,1,float16,fp8,0,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,float16,0,0.024846400320529937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,float16,fp8,0,0.013497599959373474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,4,128,1,fp8,fp8,0,0.013340799510478974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,float16,0,0.02478239983320236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,float16,fp8,0,0.012574400007724761
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,24,8,128,1,fp8,fp8,0,0.012828800082206725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,float16,0,0.020588800311088562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,24,128,1,fp8,fp8,0,0.010531199723482132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,float16,0,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,1,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,float16,0,0.020476800203323365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,4,128,1,fp8,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,fp8,0,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,24,8,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,float16,0,0.018559999763965607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,24,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,float16,fp8,0,0.009387200325727462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,1,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,float16,float16,0,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,4,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,float16,0,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,float16,0,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,24,8,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,float16,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,24,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,float16,0,0.018742400407791137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,float16,fp8,0,0.00883520022034645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,1,128,1,fp8,fp8,0,0.009379199892282485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,2,128,1,fp8,fp8,0,0.010204800218343735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,float16,0,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,float16,fp8,0,0.00928959995508194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,4,128,1,fp8,fp8,0,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,float16,0,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,float16,fp8,0,0.00958240032196045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,24,8,128,1,fp8,fp8,0,0.009884800016880035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,float16,0,0.16801919937133789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,float16,fp8,0,0.1341007947921753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,1,128,1,fp8,fp8,0,0.13313599824905395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,float16,0,0.1706272006034851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,float16,fp8,0,0.13236160278320314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,2,128,1,fp8,fp8,0,0.13222719430923463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,fp8,fp8,0,0.1325584053993225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,float16,0,0.17870080471038818
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,float16,fp8,0,0.13229119777679443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,4,128,1,fp8,fp8,0,0.13213440179824829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,float16,0,0.2067023992538452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,24,8,128,1,float16,fp8,0,0.13289120197296142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,float16,0,0.14163199663162232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,float16,fp8,0,0.07528640031814575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,24,128,1,fp8,fp8,0,0.07539839744567871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,float16,0,0.09375039935111999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,float16,fp8,0,0.07099999785423279
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,1,128,1,fp8,fp8,0,0.07106080055236816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,float16,0,0.09543520212173462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,float16,0,0.10533759593963624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,float16,fp8,0,0.07125279903411866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,2,128,1,fp8,fp8,0,0.07186239957809448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,float16,0,0.09959200024604797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,float16,fp8,0,0.07142879962921142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,4,128,1,fp8,fp8,0,0.07132959961891175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,float16,fp8,0,0.07153760194778443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,24,8,128,1,fp8,fp8,0,0.07164480090141297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,float16,0,0.07575039863586426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,float16,fp8,0,0.0432096004486084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,24,128,1,fp8,fp8,0,0.04328800141811371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,float16,0,0.05784000158309936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,float16,0,0.05762240290641785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,float16,fp8,0,0.04116320013999939
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,1,128,1,fp8,fp8,0,0.04054400026798248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,float16,0,0.05766559839248657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,float16,fp8,0,0.04087840020656586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,2,128,1,fp8,fp8,0,0.04105280041694641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,float16,fp8,0,0.041171199083328246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,4,128,1,fp8,fp8,0,0.04102399945259094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,float16,0,0.06304479837417602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,float16,fp8,0,0.04084959924221039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,24,8,128,1,fp8,fp8,0,0.041168001294136045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,float16,0,0.04745280146598816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,float16,fp8,0,0.026998400688171387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,24,128,1,fp8,fp8,0,0.02653760015964508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,float16,0,0.03912160098552704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,float16,fp8,0,0.024931199848651886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,1,128,1,fp8,fp8,0,0.024780799448490144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,float16,0,0.039263999462127684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,float16,0,0.03927839994430542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,float16,fp8,0,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,2,128,1,fp8,fp8,0,0.02495039999485016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,float16,0,0.03907679915428162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,float16,fp8,0,0.024796800315380098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,4,128,1,fp8,fp8,0,0.024860799312591553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,float16,fp8,0,0.02495039999485016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,24,8,128,1,fp8,fp8,0,0.024958400428295134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,float16,0,0.031401601433753965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,fp8,0,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,float16,fp8,0,0.016763199865818024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,24,128,1,fp8,fp8,0,0.016675199568271636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,float16,0,0.026843199133872987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,float16,fp8,0,0.01652960032224655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,1,128,1,fp8,fp8,0,0.016708800196647645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,float16,float16,0,0.02707839906215668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,2,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,float16,0,0.027689599990844728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,float16,fp8,0,0.016700799763202667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,4,128,1,fp8,fp8,0,0.0165120005607605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,float16,0,0.02757279872894287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,fp8,0,0.012625600397586822
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,float16,fp8,0,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,float16,0,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,24,8,128,1,fp8,fp8,0,0.016527999937534333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,float16,fp8,0,0.012886400520801543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,24,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,float16,float16,0,0.022676800191402436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,1,128,1,fp8,fp8,0,0.012689599394798278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,float16,0,0.022763200104236603
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,2,128,1,fp8,fp8,0,0.012615999579429627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,float16,0,0.022836799919605254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,float16,fp8,0,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,fp8,fp8,0,0.01034879982471466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,4,128,1,fp8,fp8,0,0.012745599448680877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,float16,0,0.02269600033760071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,float16,fp8,0,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,24,8,128,1,fp8,fp8,0,0.01249919980764389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,24,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,float16,0,0.018691200017929076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,1,128,1,fp8,fp8,0,0.010572800040245056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,2,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,float16,0,0.018747200071811677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,float16,fp8,0,0.01053759977221489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,4,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,float16,0,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,24,8,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,24,128,1,fp8,fp8,0,0.008966399729251862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,float16,0,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,float16,fp8,0,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,1,128,1,fp8,fp8,0,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,float16,0,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,float16,fp8,0,0.009699200093746186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,2,128,1,fp8,fp8,0,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,float16,0,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,float16,fp8,0,0.00854559987783432
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,4,128,1,fp8,fp8,0,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,float16,0,0.018812799453735353
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,float16,fp8,0,0.010351999849081039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,fp8,0,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,fp8,fp8,0,0.008975999802350998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,24,8,128,1,fp8,fp8,0,0.00933919996023178
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,float16,0,0.018649600446224213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,24,128,1,fp8,fp8,0,0.010371199995279311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,1,128,1,float16,float16,0,0.016678400337696075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,float16,fp8,0,0.008472000062465668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,2,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,float16,0,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,float16,fp8,0,0.009961599856615067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,4,128,1,fp8,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,float16,0,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,float16,fp8,0,0.008795200288295746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,24,8,128,1,fp8,fp8,0,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,fp8,0,5.656003189086914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,fp8,fp8,0,5.711446380615234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,fp8,0,5.673339080810547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,fp8,fp8,0,5.690974426269531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,1,128,1,float16,float16,0,7.361064147949219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,2,128,1,float16,float16,0,7.318408203125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,float16,0,7.625904083251953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,float16,fp8,0,5.766056060791016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,4,128,1,fp8,fp8,0,5.795064163208008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,float16,0,4.909364700317383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,fp8,0,5.811624145507812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,fp8,fp8,0,5.7611854553222654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,float16,fp8,0,2.9762496948242188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,float16,0,3.5590255737304686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,16,128,1,fp8,fp8,0,3.065809631347656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,16,8,128,1,float16,float16,0,8.1148193359375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,float16,fp8,0,2.939107131958008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,1,128,1,fp8,fp8,0,3.098923110961914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,fp8,0,2.8882240295410155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,float16,float16,0,3.626831817626953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,2,128,1,fp8,fp8,0,2.8973152160644533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,fp8,0,2.899715232849121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,fp8,fp8,0,3.1124240875244142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,4,128,1,float16,float16,0,3.879817581176758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,fp8,0,1.5829360008239746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,fp8,0,2.9478479385375977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,fp8,fp8,0,2.9649471282958983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,16,8,128,1,float16,float16,0,4.18275032043457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,float16,float16,0,2.8126768112182616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,16,128,1,fp8,fp8,0,1.5797216415405273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,float16,0,1.7464960098266602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,fp8,fp8,0,1.4912287712097168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,1,128,1,float16,fp8,0,1.7718399047851563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,fp8,0,1.5583855628967285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,float16,float16,0,1.8055759429931642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,fp8,0,1.5086511611938476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,2,128,1,fp8,fp8,0,1.871561622619629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,float16,float16,0,1.8869712829589844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,4,128,1,fp8,fp8,0,1.567967987060547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,fp8,0,0.833523178100586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,fp8,0,1.5804719924926758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,fp8,fp8,0,1.5371328353881837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,float16,float16,0,1.4456768035888672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,16,8,128,1,float16,float16,0,2.041263961791992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,16,128,1,fp8,fp8,0,1.033091163635254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,float16,0,0.9488656044006347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,fp8,fp8,0,0.808460807800293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,1,128,1,float16,fp8,0,0.8320992469787598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,float16,0,0.9569328308105469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,float16,fp8,0,0.8156607627868653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,2,128,1,fp8,fp8,0,0.909115219116211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,float16,0,1.0007712364196777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,float16,fp8,0,0.8280799865722657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,4,128,1,fp8,fp8,0,0.9531231880187988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,fp8,0,0.8132559776306152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,float16,float16,0,1.0677488327026368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,16,8,128,1,fp8,fp8,0,0.8987343788146973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,fp8,0,3.357126235961914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,fp8,fp8,0,3.3468433380126954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,1,128,1,float16,float16,0,4.032692718505859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,fp8,0,3.3137889862060548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,fp8,fp8,0,3.3032833099365235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,fp8,0,3.37918701171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,2,128,1,float16,float16,0,4.279520034790039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,float16,float16,0,4.579897689819336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,fp8,0,1.768040084838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,fp8,fp8,0,2.1508832931518556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,4,128,1,fp8,fp8,0,3.3534881591796877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,16,128,1,float16,float16,0,2.8812320709228514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,fp8,fp8,0,3.3324512481689452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,float16,0,2.1228208541870117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,fp8,0,3.696236801147461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,float16,fp8,0,1.6924623489379882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,16,8,128,1,float16,float16,0,4.983683013916016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,1,128,1,fp8,fp8,0,1.7049215316772461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,fp8,0,1.75316162109375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,float16,float16,0,2.098054313659668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,2,128,1,fp8,fp8,0,1.719425582885742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,fp8,0,1.7295984268188476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,float16,float16,0,2.4169248580932616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,4,128,1,fp8,fp8,0,1.6909183502197265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,fp8,0,1.6931903839111329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,fp8,0,0.9200575828552247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,fp8,fp8,0,0.932801628112793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,fp8,fp8,0,1.7051120758056642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,16,128,1,float16,float16,0,1.7308303833007812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,16,8,128,1,float16,float16,0,2.5063695907592773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,fp8,0,0.8983632087707519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,float16,float16,0,1.1620287895202637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,1,128,1,fp8,fp8,0,0.8893631935119629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,float16,0,1.0564959526062012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,float16,fp8,0,0.8925871849060059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,2,128,1,fp8,fp8,0,0.8918592453002929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,fp8,0,1.0871392250061036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,float16,float16,0,1.1740608215332031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,4,128,1,fp8,fp8,0,1.0682239532470703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,float16,0,1.2357808113098145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,float16,fp8,0,0.9152352333068847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,16,8,128,1,fp8,fp8,0,0.8898768424987793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,fp8,0,0.5434000015258789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,fp8,fp8,0,0.595961618423462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,16,128,1,float16,float16,0,0.7823152065277099
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,float16,0,0.5762032032012939
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,float16,fp8,0,0.5518767833709717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,1,128,1,fp8,fp8,0,0.5025296211242676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,float16,0,0.572208023071289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,float16,fp8,0,0.513812780380249
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,2,128,1,fp8,fp8,0,0.5787280082702637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,float16,0,0.6042223930358886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,float16,fp8,0,0.5093264102935791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,4,128,1,fp8,fp8,0,0.5240880012512207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,fp8,0,0.5027696132659912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,float16,float16,0,0.6659728050231933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,16,8,128,1,fp8,fp8,0,0.4988959789276123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,fp8,0,2.3465808868408202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,fp8,fp8,0,2.3382463455200195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,fp8,0,2.334040069580078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,1,128,1,float16,float16,0,2.8563968658447267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,fp8,fp8,0,2.3459232330322264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,fp8,0,2.3354368209838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,2,128,1,float16,float16,0,2.8495695114135744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,float16,float16,0,3.1654336929321287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,fp8,0,1.295143985748291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,fp8,fp8,0,1.5730992317199708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,4,128,1,fp8,fp8,0,2.418191909790039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,fp8,0,2.342692756652832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,16,128,1,float16,float16,0,2.1506975173950194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,float16,0,1.5082127571105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,fp8,fp8,0,2.6910383224487306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,float16,fp8,0,1.216147232055664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,16,8,128,1,float16,float16,0,3.521547317504883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,1,128,1,fp8,fp8,0,1.351689624786377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,fp8,0,1.2440064430236817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,float16,float16,0,1.4637439727783204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,2,128,1,fp8,fp8,0,1.2242207527160645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,fp8,0,1.2600144386291503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,fp8,fp8,0,1.2122480392456054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,4,128,1,float16,float16,0,1.6845647811889648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,fp8,0,0.6726143836975098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,fp8,0,1.2154352188110351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,fp8,fp8,0,1.2125823974609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,float16,float16,0,1.1697024345397948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,16,128,1,fp8,fp8,0,0.6805840015411377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,float16,0,0.7415984153747559
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,16,8,128,1,float16,float16,0,1.873244857788086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,fp8,fp8,0,0.645849609375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,1,128,1,float16,fp8,0,0.7757023811340332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,float16,0,0.7662992000579834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,float16,fp8,0,0.6456143856048584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,2,128,1,fp8,fp8,0,0.6969696044921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,fp8,0,0.65939359664917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,float16,float16,0,0.8168767929077149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,4,128,1,fp8,fp8,0,0.7017168045043946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,fp8,0,0.726691198348999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,float16,float16,0,0.9121328353881836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,16,8,128,1,fp8,fp8,0,0.6845536231994629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,float16,0,0.5980671882629395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,float16,fp8,0,0.3911616086959839
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,16,128,1,fp8,fp8,0,0.37746078968048097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,float16,0,0.4203951835632324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,float16,0,0.4054543972015381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,float16,fp8,0,0.40439682006835936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,1,128,1,fp8,fp8,0,0.3769680023193359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,float16,fp8,0,0.374019193649292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,2,128,1,fp8,fp8,0,0.3629744052886963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,float16,0,0.44878721237182617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,float16,fp8,0,0.36350560188293457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,4,128,1,fp8,fp8,0,0.3658031940460205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,fp8,0,0.3742160081863403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,float16,float16,0,0.5198544025421142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,16,8,128,1,fp8,fp8,0,0.365664005279541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,fp8,0,3.0532127380371095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,fp8,fp8,0,3.0657615661621094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,1,128,1,float16,float16,0,3.574264144897461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,fp8,0,3.063910484313965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,fp8,fp8,0,3.065505599975586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,fp8,0,3.064740753173828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,2,128,1,float16,float16,0,3.9754974365234377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,float16,float16,0,4.166452789306641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,fp8,0,1.6846511840820313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,fp8,fp8,0,2.0030736923217773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,4,128,1,fp8,fp8,0,3.1047952651977537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,fp8,fp8,0,3.1102367401123048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,16,128,1,float16,float16,0,3.027137565612793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,float16,0,1.9127935409545898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,fp8,0,3.5195854187011717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,float16,fp8,0,1.5894175529479981
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,16,8,128,1,float16,float16,0,4.825624084472656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,1,128,1,fp8,fp8,0,1.5990015983581543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,fp8,0,1.5614607810974122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,float16,float16,0,1.9263776779174804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,2,128,1,fp8,fp8,0,1.583027172088623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,fp8,0,1.5894031524658203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,float16,float16,0,2.169304084777832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,4,128,1,fp8,fp8,0,1.5603376388549806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,fp8,0,0.8601200103759765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,fp8,0,1.5974127769470214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,float16,float16,0,1.5385647773742677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,16,128,1,fp8,fp8,0,1.0044464111328124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,float16,0,0.9385711669921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,float16,float16,0,2.3869279861450194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,16,8,128,1,fp8,fp8,0,1.8664896011352539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,float16,fp8,0,0.8152959823608399
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,1,128,1,fp8,fp8,0,0.8291456222534179
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,float16,0,0.9795856475830078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,float16,fp8,0,0.9020336151123047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,2,128,1,fp8,fp8,0,0.912656021118164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,fp8,0,0.8459136009216308
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,fp8,fp8,0,0.8120575904846191
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,4,128,1,float16,float16,0,1.0796367645263671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,fp8,0,0.8852160453796387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,fp8,0,0.4789919853210449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,float16,float16,0,1.2153440475463868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,16,8,128,1,fp8,fp8,0,0.8661567687988281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,float16,float16,0,0.8137071609497071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,16,128,1,fp8,fp8,0,0.4590127944946289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,float16,0,0.5054719924926758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,float16,fp8,0,0.4562543869018555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,1,128,1,fp8,fp8,0,0.45568161010742186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,float16,0,0.5334271907806396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,float16,fp8,0,0.44480161666870116
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,2,128,1,fp8,fp8,0,0.4417263984680176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,float16,0,0.5600319862365722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,float16,fp8,0,0.4406735897064209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,4,128,1,fp8,fp8,0,0.4474031925201416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,fp8,0,0.2661119937896729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,fp8,0,0.4416111946105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,float16,float16,0,0.6419023990631103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,16,8,128,1,fp8,fp8,0,0.44557600021362304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,float16,float16,0,0.4356239795684814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,16,128,1,fp8,fp8,0,0.26598880290985105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,float16,0,0.2785056114196777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,float16,fp8,0,0.25494558811187745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,1,128,1,fp8,fp8,0,0.25534560680389407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,float16,0,0.2873296022415161
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,float16,fp8,0,0.25775039196014404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,float16,0,0.3552544116973877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,2,128,1,fp8,fp8,0,0.255676794052124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,float16,0,0.3129503965377808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,float16,fp8,0,0.2548543930053711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,4,128,1,fp8,fp8,0,0.25427999496459963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,float16,fp8,0,0.2547231912612915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,16,8,128,1,fp8,fp8,0,0.2539936065673828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,fp8,0,1.8084352493286133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,fp8,fp8,0,1.8043920516967773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,1,128,1,float16,float16,0,2.1110591888427734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,fp8,0,1.8084495544433594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,fp8,fp8,0,1.805463981628418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,2,128,1,float16,float16,0,2.2995983123779298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,fp8,0,1.805678367614746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,float16,float16,0,2.48437442779541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,4,128,1,fp8,fp8,0,1.827168083190918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,fp8,0,1.2156479835510254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,fp8,fp8,0,0.9842240333557128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,fp8,fp8,0,1.8099119186401367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,fp8,0,2.0943248748779295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,16,128,1,float16,float16,0,1.971659278869629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,float16,0,1.104372787475586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,float16,fp8,0,0.9442543983459473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,16,8,128,1,float16,float16,0,2.9830144882202148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,1,128,1,fp8,fp8,0,1.1832639694213867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,fp8,0,0.9485391616821289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,float16,float16,0,1.142414379119873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,2,128,1,fp8,fp8,0,0.9655695915222168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,fp8,0,1.0538496017456054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,fp8,fp8,0,0.9342592239379883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,4,128,1,float16,float16,0,1.2558704376220704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,fp8,0,0.5232192039489746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,fp8,fp8,0,0.9355695724487305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,fp8,0,1.0412863731384276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,float16,float16,0,1.0472047805786133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,16,128,1,fp8,fp8,0,0.5518015861511231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,fp8,0,0.5343887805938721
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,16,8,128,1,float16,float16,0,1.5318639755249024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,float16,float16,0,0.5629983901977539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,1,128,1,fp8,fp8,0,0.4949711799621582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,float16,0,0.5916399955749512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,float16,fp8,0,0.4954495906829834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,2,128,1,fp8,fp8,0,0.5075632095336914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,float16,0,0.6628528118133545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,float16,fp8,0,0.5121615886688232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,4,128,1,fp8,fp8,0,0.5001696109771728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,fp8,0,0.49427042007446287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,float16,float16,0,0.7706768035888671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,16,8,128,1,fp8,fp8,0,0.49389119148254396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,fp8,0,0.2900752067565918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,float16,float16,0,0.5344272136688233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,16,128,1,fp8,fp8,0,0.295795202255249
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,float16,0,0.31092960834503175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,float16,fp8,0,0.2734960079193115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,1,128,1,fp8,fp8,0,0.2744704008102417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,float16,0,0.32359359264373777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,float16,fp8,0,0.27315359115600585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,2,128,1,fp8,fp8,0,0.27258079051971434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,float16,0,0.41505279541015627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,float16,0,0.35493760108947753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,float16,fp8,0,0.27333118915557864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,4,128,1,fp8,fp8,0,0.27261440753936766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,float16,fp8,0,0.27470080852508544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,16,8,128,1,fp8,fp8,0,0.27572319507598875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,float16,0,0.29530398845672606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,float16,fp8,0,0.17142080068588256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,16,128,1,fp8,fp8,0,0.17002559900283815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,float16,0,0.18018239736557007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,float16,fp8,0,0.1621407985687256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,1,128,1,fp8,fp8,0,0.16265920400619507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,float16,0,0.18495359420776367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,float16,fp8,0,0.1625615954399109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,2,128,1,fp8,fp8,0,0.16418080329895018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,float16,0,0.19709759950637817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,float16,fp8,0,0.16287519931793212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,4,128,1,fp8,fp8,0,0.16260960102081298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,float16,0,0.2317967891693115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,float16,fp8,0,0.16255040168762208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,16,8,128,1,fp8,fp8,0,0.16325759887695312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,fp8,0,1.7371919631958008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,fp8,fp8,0,1.7362592697143555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,1,128,1,float16,float16,0,2.0426864624023438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,fp8,0,1.7386383056640624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,fp8,fp8,0,1.7397584915161133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,2,128,1,float16,float16,0,2.1885679244995115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,float16,0,2.5271167755126953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,float16,fp8,0,1.7381135940551757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,fp8,0,1.1008912086486817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,4,128,1,fp8,fp8,0,1.736814308166504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,fp8,0,1.73712158203125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,fp8,fp8,0,1.7416288375854492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,fp8,fp8,0,1.0751919746398926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,16,128,1,float16,float16,0,2.2176095962524416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,float16,0,1.0304623603820802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,float16,fp8,0,1.0206208229064941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,1,128,1,fp8,fp8,0,0.8957391738891601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,fp8,0,0.8930975914001464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,float16,float16,0,1.1009840011596679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,16,8,128,1,float16,float16,0,3.3537296295166015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,2,128,1,fp8,fp8,0,0.9108448028564453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,fp8,0,0.89859037399292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,fp8,fp8,0,0.9184111595153809
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,4,128,1,float16,float16,0,1.2648688316345216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,fp8,0,0.8906448364257813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,fp8,0,0.5041888236999512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,fp8,fp8,0,0.9806015968322754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,fp8,fp8,0,0.5212719917297364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,float16,0,0.5306528091430665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,16,128,1,float16,float16,0,1.1998767852783203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,float16,fp8,0,0.49248480796813965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,16,8,128,1,float16,float16,0,1.5881055831909179
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,1,128,1,fp8,fp8,0,0.4652143955230713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,float16,0,0.5736671924591065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,float16,fp8,0,0.47746877670288085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,fp8,fp8,0,0.4729487895965576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,2,128,1,fp8,fp8,0,0.4781328201293945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,float16,0,0.650819206237793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,float16,0,0.8110287666320801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,4,128,1,float16,fp8,0,0.5087632179260254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,float16,fp8,0,0.46520161628723145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,16,8,128,1,fp8,fp8,0,0.46572318077087405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,fp8,0,0.27387199401855467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,float16,float16,0,0.5913072109222413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,16,128,1,fp8,fp8,0,0.27273600101470946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,float16,0,0.2923919916152954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,float16,fp8,0,0.2528608083724976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,float16,0,0.34856319427490234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,1,128,1,fp8,fp8,0,0.2546351909637451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,float16,0,0.30670719146728515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,float16,fp8,0,0.25258560180664064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,2,128,1,fp8,fp8,0,0.2526544094085693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,float16,fp8,0,0.252675199508667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,4,128,1,fp8,fp8,0,0.2525727987289429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,fp8,0,0.2543312072753906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,float16,float16,0,0.42684478759765626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,16,8,128,1,fp8,fp8,0,0.2554624080657959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,fp8,0,0.1568112015724182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,float16,float16,0,0.31737279891967773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,16,128,1,fp8,fp8,0,0.1565600037574768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,float16,0,0.16330720186233522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,float16,fp8,0,0.1462000012397766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,1,128,1,fp8,fp8,0,0.14564319849014282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,float16,0,0.17032480239868164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,float16,fp8,0,0.1476032018661499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,2,128,1,fp8,fp8,0,0.14637919664382934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,float16,0,0.19485599994659425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,float16,fp8,0,0.14614239931106568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,4,128,1,fp8,fp8,0,0.14582079648971558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,float16,0,0.23813440799713134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,float16,fp8,0,0.14625760316848754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,16,8,128,1,fp8,fp8,0,0.1462224006652832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,float16,0,0.18107839822769164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,float16,fp8,0,0.09913600087165833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,16,128,1,fp8,fp8,0,0.09795519709587097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,float16,0,0.10688159465789795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,fp8,fp8,0,0.09497280120849609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,float16,fp8,0,0.09422720074653626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,1,128,1,fp8,fp8,0,0.09456160068511962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,float16,0,0.11153119802474976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,float16,0,0.13238400220870972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,2,128,1,float16,fp8,0,0.09382719993591308
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,float16,0,0.1171455979347229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,float16,fp8,0,0.09467039704322815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,4,128,1,fp8,fp8,0,0.09470720291137695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,float16,fp8,0,0.09506719708442687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,16,8,128,1,fp8,fp8,0,0.09457119703292846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,fp8,0,1.0639360427856446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,float16,float16,0,1.2303423881530762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,1,128,1,fp8,fp8,0,1.0653440475463867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,fp8,0,1.0652560234069823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,float16,float16,0,1.3437487602233886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,2,128,1,fp8,fp8,0,1.0647952079772949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,float16,0,1.5896703720092773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,float16,fp8,0,1.0628640174865722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,4,128,1,fp8,fp8,0,1.1558896064758302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,fp8,0,1.0647503852844238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,fp8,fp8,0,1.0985247611999511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,fp8,0,0.6033455848693847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,fp8,fp8,0,0.6295951843261719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,float16,0,0.6269599914550781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,16,8,128,1,float16,float16,0,2.0626720428466796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,float16,fp8,0,0.57740478515625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,16,128,1,float16,float16,0,1.5218031883239747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,fp8,0,0.5489007949829101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,1,128,1,fp8,fp8,0,0.5494095802307128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,float16,float16,0,0.6886015892028808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,2,128,1,fp8,fp8,0,0.5574528217315674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,fp8,0,0.5488944053649902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,float16,float16,0,0.8066399574279786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,4,128,1,fp8,fp8,0,0.567519998550415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,fp8,0,0.5493103981018066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,fp8,fp8,0,0.5505248069763183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,fp8,0,0.3231647968292236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,16,8,128,1,float16,float16,0,1.0482864379882812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,fp8,fp8,0,0.31812319755554197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,float16,0,0.3391103982925415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,16,128,1,float16,float16,0,0.7833856105804443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,fp8,fp8,0,0.29057118892669676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,float16,fp8,0,0.2903759956359863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,1,128,1,fp8,fp8,0,0.2921168088912964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,float16,0,0.3608896017074585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,2,128,1,float16,fp8,0,0.2921087980270386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,float16,0,0.4197103977203369
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,float16,fp8,0,0.29189438819885255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,4,128,1,fp8,fp8,0,0.29087998867034914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,fp8,0,0.2907776117324829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,float16,float16,0,0.5396128177642823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,16,8,128,1,fp8,fp8,0,0.29353439807891846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,fp8,fp8,0,0.1603600025177002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,fp8,0,0.17614079713821412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,float16,float16,0,0.41001601219177247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,16,128,1,fp8,fp8,0,0.17638399600982665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,float16,0,0.18829439878463744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,1,128,1,float16,fp8,0,0.1608031988143921
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,float16,0,0.20179998874664307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,float16,fp8,0,0.16305760145187378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,float16,0,0.289849591255188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,2,128,1,fp8,fp8,0,0.16093120574951172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,float16,0,0.23127360343933107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,float16,fp8,0,0.1613327980041504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,4,128,1,fp8,fp8,0,0.1614575982093811
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,float16,fp8,0,0.16268479824066162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,16,8,128,1,fp8,fp8,0,0.16261279582977295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,fp8,0,0.1045807957649231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,float16,float16,0,0.22708640098571778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,16,128,1,fp8,fp8,0,0.10343999862670898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,float16,0,0.11193759441375732
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,float16,fp8,0,0.09689919948577881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,1,128,1,fp8,fp8,0,0.09698079824447632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,float16,0,0.11615200042724609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,float16,fp8,0,0.09678400158882142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,2,128,1,fp8,fp8,0,0.09680799841880798
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,float16,0,0.12824480533599852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,float16,fp8,0,0.09646400213241577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,4,128,1,fp8,fp8,0,0.09729120135307312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,float16,0,0.07567840218544006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,float16,0,0.16281599998474122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,float16,fp8,0,0.09705119729042053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,16,8,128,1,fp8,fp8,0,0.09683359861373901
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,float16,0,0.11614400148391724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,float16,fp8,0,0.06707680225372314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,16,128,1,fp8,fp8,0,0.06784319877624512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,float16,fp8,0,0.06424639821052551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,1,128,1,fp8,fp8,0,0.06431679725646973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,float16,0,0.07934719920158387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,float16,fp8,0,0.06396160125732422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,2,128,1,fp8,fp8,0,0.06372960209846497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,float16,0,0.08463039994239807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,float16,fp8,0,0.0638256013393402
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,4,128,1,fp8,fp8,0,0.06459519863128663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,float16,0,0.09403839707374573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,float16,fp8,0,0.06446719765663148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,16,8,128,1,fp8,fp8,0,0.06419519782066345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,fp8,0,1.0770223617553711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,float16,float16,0,1.2316255569458008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,1,128,1,fp8,fp8,0,1.07741117477417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,fp8,0,1.0781408309936524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,fp8,fp8,0,1.0752127647399903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,2,128,1,float16,float16,0,1.3963647842407227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,fp8,0,1.0777008056640625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,float16,float16,0,1.726580810546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,4,128,1,fp8,fp8,0,1.077841567993164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,fp8,0,1.077131175994873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,fp8,fp8,0,1.0765839576721192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,fp8,0,0.6225887775421143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,fp8,fp8,0,0.6378399848937988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,float16,0,0.626527976989746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,float16,fp8,0,0.550929594039917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,16,128,1,float16,float16,0,1.8466352462768554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,1,128,1,fp8,fp8,0,0.5495376110076904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,16,8,128,1,float16,float16,0,2.3563743591308595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,float16,0,0.7080783843994141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,float16,fp8,0,0.5528800010681152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,2,128,1,fp8,fp8,0,0.5630943775177002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,fp8,0,0.5510879993438721
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,fp8,fp8,0,0.5497039794921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,4,128,1,float16,float16,0,0.870792007446289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,fp8,0,0.5521344184875489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,fp8,fp8,0,0.5527904033660889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,fp8,0,0.3238368034362793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,float16,0,0.3296096086502075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,fp8,fp8,0,0.3239264011383057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,float16,fp8,0,0.2883968114852905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,16,8,128,1,float16,float16,0,1.1892767906188966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,fp8,0,0.28913280963897703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,1,128,1,fp8,fp8,0,0.28781280517578123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,16,128,1,float16,float16,0,0.9279935836791993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,float16,0,0.44881439208984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,float16,float16,0,0.36757121086120603
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,2,128,1,fp8,fp8,0,0.28739678859710693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,float16,fp8,0,0.28788959980010986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,4,128,1,fp8,fp8,0,0.2893791913986206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,fp8,0,0.2890336036682129
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,fp8,fp8,0,0.17571680545806884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,fp8,fp8,0,0.2890383958816528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,16,8,128,1,float16,float16,0,0.6093920230865478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,fp8,0,0.17560319900512694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,16,128,1,float16,float16,0,0.4786655902862549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,float16,0,0.18538880348205566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,float16,fp8,0,0.15632959604263305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,1,128,1,fp8,fp8,0,0.15636639595031737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,fp8,0,0.15691360235214233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,fp8,fp8,0,0.1569648027420044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,fp8,0,0.15789599418640138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,float16,0,0.24221599102020264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,2,128,1,float16,float16,0,0.20292000770568847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,float16,fp8,0,0.15688799619674682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,4,128,1,fp8,fp8,0,0.1587455987930298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,float16,0,0.10714559555053711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,float16,float16,0,0.31988480091094973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,16,8,128,1,fp8,fp8,0,0.1578719973564148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,float16,0,0.25691521167755127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,float16,fp8,0,0.10131360292434692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,16,128,1,fp8,fp8,0,0.10134079456329345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,float16,fp8,0,0.09075199961662292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,1,128,1,fp8,fp8,0,0.09107199907302857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,float16,0,0.11314879655838013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,float16,fp8,0,0.09098399877548217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,2,128,1,fp8,fp8,0,0.09053120017051697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,float16,0,0.13538880348205568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,float16,fp8,0,0.09103839993476867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,4,128,1,fp8,fp8,0,0.09055200219154358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,float16,0,0.1793056011199951
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,float16,fp8,0,0.09175040125846863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,16,8,128,1,fp8,fp8,0,0.09111840128898621
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,float16,0,0.14050079584121705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,fp8,0,0.05757279992103577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,float16,fp8,0,0.06154559850692749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,16,128,1,fp8,fp8,0,0.06177759766578674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,float16,0,0.0689791977405548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,fp8,fp8,0,0.05761600136756897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,float16,fp8,0,0.05760319828987122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,1,128,1,fp8,fp8,0,0.05742719769477844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,float16,float16,0,0.07486079931259156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,2,128,1,fp8,fp8,0,0.057652801275253296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,float16,0,0.080103999376297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,4,128,1,float16,fp8,0,0.05756639838218689
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,float16,0,0.09434559941291809
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,float16,fp8,0,0.05780959725379944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,16,8,128,1,fp8,fp8,0,0.05755680203437805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,float16,0,0.07193120121955872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,fp8,0,0.03701280057430267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,float16,fp8,0,0.03917439877986908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,16,128,1,fp8,fp8,0,0.03934400081634522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,float16,0,0.047312000393867494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,float16,fp8,0,0.037031999230384825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,1,128,1,fp8,fp8,0,0.03700479865074158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,float16,float16,0,0.04744159877300262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,2,128,1,fp8,fp8,0,0.03709760010242462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,float16,0,0.05289919972419739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,float16,fp8,0,0.037064000964164734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,4,128,1,fp8,fp8,0,0.0370959997177124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,float16,0,0.05856159925460815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,float16,fp8,0,0.03718400001525879
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,16,8,128,1,fp8,fp8,0,0.03705919981002807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,float16,0,0.7888048171997071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,float16,fp8,0,0.6889039993286132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,1,128,1,fp8,fp8,0,0.6879199981689453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,float16,0,0.9051103591918945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,float16,fp8,0,0.6892960071563721
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,2,128,1,fp8,fp8,0,0.6883039951324463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,fp8,0,0.702182388305664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,fp8,fp8,0,0.686840009689331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,4,128,1,float16,float16,0,1.1440464019775392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,fp8,0,0.40943517684936526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,fp8,fp8,0,0.6898928165435791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,fp8,0,0.6890016078948975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,fp8,fp8,0,0.4075119972229004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,float16,0,0.4029551982879639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,float16,fp8,0,0.3541264057159424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,16,8,128,1,float16,float16,0,1.6285760879516602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,1,128,1,fp8,fp8,0,0.3550879955291748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,16,128,1,float16,float16,0,1.2976703643798828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,fp8,fp8,0,0.3544255971908569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,fp8,0,0.35343520641326903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,fp8,0,0.3546799898147583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,2,128,1,float16,float16,0,0.4636832237243652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,float16,float16,0,0.5858799934387207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,4,128,1,fp8,fp8,0,0.3551136016845703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,fp8,0,0.3556607961654663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,fp8,fp8,0,0.35484321117401124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,fp8,0,0.2153264045715332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,fp8,fp8,0,0.21658239364624024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,16,8,128,1,float16,float16,0,0.8248031616210938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,float16,0,0.21931519508361816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,16,128,1,float16,float16,0,0.6634479999542237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,float16,fp8,0,0.18751519918441772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,float16,0,0.30629920959472656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,fp8,fp8,0,0.1880944013595581
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,4,128,1,float16,fp8,0,0.1887168049812317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,1,128,1,fp8,fp8,0,0.1879536032676697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,fp8,fp8,0,0.18921600580215453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,float16,0,0.2476639986038208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,float16,fp8,0,0.18835519552230834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,2,128,1,fp8,fp8,0,0.19003039598464966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,fp8,0,0.18914400339126586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,fp8,0,0.10438719987869263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,16,8,128,1,float16,float16,0,0.42600321769714355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,fp8,0,0.11951520442962646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,float16,float16,0,0.34768478870391845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,16,128,1,fp8,fp8,0,0.11981600522994995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,float16,float16,0,0.12899359464645385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,1,128,1,fp8,fp8,0,0.10529760122299195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,float16,0,0.1430191993713379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,float16,fp8,0,0.10428479909896851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,2,128,1,fp8,fp8,0,0.10457439422607422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,fp8,fp8,0,0.10553439855575561
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,float16,0,0.17099839448928833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,float16,fp8,0,0.10452640056610107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,4,128,1,fp8,fp8,0,0.10471199750900269
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,float16,0,0.2297136068344116
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,16,8,128,1,float16,fp8,0,0.10700479745864869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,fp8,0,0.06905120015144348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,float16,float16,0,0.1890336036682129
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,16,128,1,fp8,fp8,0,0.06964160203933716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,float16,0,0.07605440020561219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,float16,fp8,0,0.06233919858932495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,1,128,1,fp8,fp8,0,0.06226720213890076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,float16,0,0.08081600069999695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,float16,0,0.12354079484939576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,float16,fp8,0,0.06248800158500671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,float16,fp8,0,0.06361280083656311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,2,128,1,fp8,fp8,0,0.062116801738739014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,float16,0,0.09504960179328918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,float16,fp8,0,0.061900800466537474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,4,128,1,fp8,fp8,0,0.0623088002204895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,16,8,128,1,fp8,fp8,0,0.06258400082588196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,fp8,0,0.04475359916687012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,float16,float16,0,0.09284480214118958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,16,128,1,fp8,fp8,0,0.04482080042362213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,float16,0,0.05186079740524292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,float16,fp8,0,0.041203200817108154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,1,128,1,fp8,fp8,0,0.04110719859600067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,float16,0,0.055553597211837766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,float16,fp8,0,0.04116480052471161
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,2,128,1,fp8,fp8,0,0.04121600091457367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,float16,0,0.060070401430130003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,float16,fp8,0,0.04123519957065582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,4,128,1,fp8,fp8,0,0.04118399918079376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,float16,0,0.07027199864387512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,float16,fp8,0,0.04116480052471161
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,16,8,128,1,fp8,fp8,0,0.041176000237464906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,float16,0,0.05956000089645386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,float16,fp8,0,0.031297600269317626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,16,128,1,fp8,fp8,0,0.031097599864006044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,float16,0,0.040822398662567136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,float16,fp8,0,0.030291199684143066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,1,128,1,fp8,fp8,0,0.0295632004737854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,float16,0,0.04110080003738403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,float16,fp8,0,0.029123198986053467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,2,128,1,fp8,fp8,0,0.028942400217056276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,float16,0,0.0435808002948761
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,float16,fp8,0,0.028918400406837463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,4,128,1,fp8,fp8,0,0.029067200422286988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,float16,0,0.04952639937400818
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,float16,fp8,0,0.029443201422691346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,16,8,128,1,fp8,fp8,0,0.030675199627876282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,float16,0,0.8422143936157227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,float16,fp8,0,0.7449647903442382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,1,128,1,fp8,fp8,0,0.7458288192749023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,fp8,0,0.7451824188232422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,float16,float16,0,1.0096943855285645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,2,128,1,fp8,fp8,0,0.7452943801879883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,fp8,0,0.7447792053222656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,fp8,fp8,0,0.7429264068603516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,4,128,1,float16,float16,0,1.331379222869873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,fp8,0,0.7471231937408447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,fp8,fp8,0,0.7475344181060791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,fp8,0,0.4518671989440918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,fp8,fp8,0,0.4525775909423828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,float16,0,0.43383359909057617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,float16,fp8,0,0.38076798915863036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,1,128,1,fp8,fp8,0,0.38157761096954346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,float16,0,0.5126959800720214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,float16,fp8,0,0.38129758834838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,16,8,128,1,float16,float16,0,1.9731103897094726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,16,128,1,float16,float16,0,1.6229183197021484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,fp8,0,0.38240799903869627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,2,128,1,fp8,fp8,0,0.38120479583740235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,float16,float16,0,0.6751632213592529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,4,128,1,fp8,fp8,0,0.3813119888305664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,fp8,0,0.3819024085998535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,fp8,fp8,0,0.38166561126708987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,fp8,0,0.23553760051727296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,fp8,fp8,0,0.23496320247650146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,float16,0,0.23196001052856446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,16,8,128,1,float16,float16,0,0.9943103790283203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,fp8,fp8,0,0.19935359954833984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,float16,fp8,0,0.19916800260543824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,16,128,1,float16,float16,0,0.8248448371887207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,1,128,1,fp8,fp8,0,0.19879839420318604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,float16,0,0.27085280418395996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,2,128,1,float16,fp8,0,0.1991647958755493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,fp8,0,0.19947359561920167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,float16,float16,0,0.3498512029647827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,fp8,0,0.12670079469680787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,4,128,1,fp8,fp8,0,0.20008959770202636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,fp8,0,0.20211679935455323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,fp8,fp8,0,0.20010719299316407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,16,8,128,1,float16,float16,0,0.5090223789215088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,fp8,fp8,0,0.1272447943687439
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,16,128,1,float16,float16,0,0.42513279914855956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,float16,0,0.13244479894638062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,float16,0,0.19086400270462037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,float16,fp8,0,0.10783519744873046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,1,128,1,fp8,fp8,0,0.10744800567626953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,float16,0,0.1535215973854065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,float16,fp8,0,0.10877120494842529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,2,128,1,fp8,fp8,0,0.10814399719238281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,float16,fp8,0,0.1088479995727539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,4,128,1,fp8,fp8,0,0.10837759971618652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,float16,0,0.2676944017410278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,float16,fp8,0,0.10956640243530273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,16,8,128,1,fp8,fp8,0,0.10906879901885987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,fp8,0,0.0720575988292694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,float16,float16,0,0.2260672092437744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,fp8,0,0.061715197563171384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,16,128,1,fp8,fp8,0,0.07235040068626404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,float16,0,0.07574719786643982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,float16,fp8,0,0.061596798896789554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,1,128,1,fp8,fp8,0,0.06209279894828797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,float16,float16,0,0.08354719877243041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,2,128,1,fp8,fp8,0,0.06161119937896729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,float16,0,0.1076464056968689
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,float16,fp8,0,0.06182399988174438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,4,128,1,fp8,fp8,0,0.0618399977684021
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,float16,0,0.14873759746551513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,float16,fp8,0,0.062063997983932494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,16,8,128,1,fp8,fp8,0,0.06272000074386597
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,float16,0,0.123198401927948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,float16,fp8,0,0.043198400735855104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,16,128,1,fp8,fp8,0,0.04347679913043976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,float16,0,0.04940640032291412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,float16,fp8,0,0.03915840089321136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,1,128,1,fp8,fp8,0,0.03919680118560791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,float16,0,0.05398880243301392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,float16,fp8,0,0.039134401082992556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,2,128,1,fp8,fp8,0,0.039150398969650266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,float16,0,0.05979040265083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,float16,0,0.05968800187110901
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,float16,fp8,0,0.03929280042648316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,4,128,1,fp8,fp8,0,0.039078399538993835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,float16,0,0.07427039742469788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,float16,fp8,0,0.03914079964160919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,16,8,128,1,fp8,fp8,0,0.039182400703430174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,float16,fp8,0,0.026950401067733765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,16,128,1,fp8,fp8,0,0.0268528014421463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,float16,0,0.0351936012506485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,float16,fp8,0,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,1,128,1,fp8,fp8,0,0.024822400510311128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,float16,0,0.03517119884490967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,fp8,0,0.024868799746036528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,float16,fp8,0,0.02484800070524216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,2,128,1,fp8,fp8,0,0.024804799258708952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,float16,0,0.04039359986782074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,float16,fp8,0,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,4,128,1,fp8,fp8,0,0.024796800315380098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,float16,float16,0,0.045956799387931825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,fp8,0,0.022838400304317476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,16,8,128,1,fp8,fp8,0,0.024886399507522583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,float16,0,0.03307519853115082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,float16,0,0.04529759883880615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,float16,fp8,0,0.024715200066566467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,16,128,1,fp8,fp8,0,0.024702399969100952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,float16,float16,0,0.033457601070404054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,1,128,1,fp8,fp8,0,0.022884799540042876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,float16,fp8,0,0.022859199345111846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,2,128,1,fp8,fp8,0,0.022859199345111846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,float16,0,0.033537599444389346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,float16,fp8,0,0.02274080067873001
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,4,128,1,fp8,fp8,0,0.02285760045051575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,float16,0,0.038646399974823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,float16,fp8,0,0.022891199588775633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,16,8,128,1,fp8,fp8,0,0.022683200240135194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,float16,0,0.6607247829437256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,float16,fp8,0,0.5817520141601562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,1,128,1,fp8,fp8,0,0.5801968097686767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,fp8,0,0.581217622756958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,fp8,fp8,0,0.5820528030395508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,2,128,1,float16,float16,0,0.8285807609558106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,fp8,0,0.5799263954162598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,fp8,fp8,0,0.5810671806335449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,4,128,1,float16,float16,0,1.1440239906311036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,fp8,0,0.5813248157501221
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,fp8,0,0.3662559986114502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,fp8,fp8,0,0.5821616172790527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,fp8,0,0.2964128017425537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,fp8,fp8,0,0.3647536039352417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,float16,float16,0,0.3440943956375122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,1,128,1,fp8,fp8,0,0.29775519371032716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,16,8,128,1,float16,float16,0,1.777676773071289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,fp8,0,0.29807679653167723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,float16,float16,0,0.4236480236053467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,2,128,1,fp8,fp8,0,0.2966111898422241
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,16,128,1,float16,float16,0,1.5261296272277831
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,fp8,0,0.29802560806274414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,fp8,0,0.29732160568237304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,float16,float16,0,0.5859471797943115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,4,128,1,fp8,fp8,0,0.29767038822174074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,fp8,fp8,0,0.29689760208129884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,fp8,0,0.19026880264282225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,16,8,128,1,float16,float16,0,0.9001600265502929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,fp8,fp8,0,0.19119679927825928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,float16,0,0.18755840063095092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,16,128,1,float16,float16,0,0.7732416152954101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,float16,fp8,0,0.15616960525512696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,1,128,1,fp8,fp8,0,0.15624159574508667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,float16,0,0.22614240646362305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,float16,fp8,0,0.15646719932556152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,2,128,1,fp8,fp8,0,0.15601279735565185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,float16,0,0.46231999397277834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,fp8,fp8,0,0.15615040063858032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,float16,0,0.3059024095535278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,float16,fp8,0,0.15623680353164673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,4,128,1,fp8,fp8,0,0.15603519678115846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,16,8,128,1,float16,fp8,0,0.15675519704818724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,fp8,0,0.10284320116043091
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,fp8,fp8,0,0.10254240036010742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,float16,0,0.1091040015220642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,fp8,fp8,0,0.0847823977470398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,16,128,1,float16,float16,0,0.399783992767334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,fp8,0,0.08496959805488587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,float16,fp8,0,0.08451679944992066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,1,128,1,fp8,fp8,0,0.08447840213775634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,float16,0,0.12766239643096924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,2,128,1,float16,fp8,0,0.08512639999389648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,float16,float16,0,0.16598880290985107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,4,128,1,fp8,fp8,0,0.08532320261001587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,float16,0,0.24434878826141357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,float16,fp8,0,0.08539199829101562
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,16,8,128,1,fp8,fp8,0,0.08616639971733094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,float16,0,0.21131999492645265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,float16,fp8,0,0.0576416015625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,16,128,1,fp8,fp8,0,0.05851680040359497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,float16,0,0.06208639740943909
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,float16,fp8,0,0.04778720140457153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,1,128,1,fp8,fp8,0,0.04774399995803833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,float16,0,0.06869120001792908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,float16,fp8,0,0.0488207995891571
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,2,128,1,fp8,fp8,0,0.04753119945526123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,float16,0,0.08915519714355469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,float16,fp8,0,0.048404800891876223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,4,128,1,fp8,fp8,0,0.04822080135345459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,float16,0,0.13430559635162354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,float16,fp8,0,0.04797120094299316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,16,8,128,1,fp8,fp8,0,0.04827199876308441
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,float16,0,0.11145440340042115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,float16,fp8,0,0.03358879983425141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,16,128,1,fp8,fp8,0,0.035036799311637876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,float16,0,0.039499199390411376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,float16,fp8,0,0.02975200116634369
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,1,128,1,fp8,fp8,0,0.029032000899314882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,float16,0,0.044705599546432495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,float16,fp8,0,0.02908959984779358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,2,128,1,fp8,fp8,0,0.029868799448013305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,float16,0,0.05050879716873169
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,float16,fp8,0,0.030430400371551515
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,4,128,1,fp8,fp8,0,0.02913439869880676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,float16,0,0.06578239798545837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,float16,fp8,0,0.02951520085334778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,16,8,128,1,fp8,fp8,0,0.02945919930934906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,float16,0,0.05375199913978577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,float16,fp8,0,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,16,128,1,fp8,fp8,0,0.020694400370121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,float16,0,0.028960001468658448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,float16,fp8,0,0.01863359957933426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,1,128,1,fp8,fp8,0,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,float16,0,0.029054400324821473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,float16,fp8,0,0.018739199638366698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,2,128,1,fp8,fp8,0,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,float16,0,0.03455680012702942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,float16,fp8,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,4,128,1,fp8,fp8,0,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,float16,0,0.040094399452209474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,float16,fp8,0,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,16,8,128,1,fp8,fp8,0,0.018848000466823576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,float16,0,0.03914560079574585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,float16,fp8,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,16,128,1,fp8,fp8,0,0.018611200153827667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,float16,0,0.026902401447296144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,float16,fp8,0,0.016702400147914888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,1,128,1,fp8,fp8,0,0.016675199568271636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,float16,0,0.026878398656845093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,float16,0,0.033024001121521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,float16,fp8,0,0.016628800332546233
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,2,128,1,fp8,fp8,0,0.016729600727558136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,float16,0,0.026995199918746948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,float16,fp8,0,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,4,128,1,fp8,fp8,0,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,float16,fp8,0,0.016571199893951415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,16,8,128,1,fp8,fp8,0,0.01660960018634796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,float16,0,0.03131999969482422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,float16,fp8,0,0.016548800468444824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,16,128,1,fp8,fp8,0,0.01656160056591034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,fp8,fp8,0,0.016516800224781036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,float16,0,0.02698560059070587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,float16,fp8,0,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,1,128,1,fp8,fp8,0,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,fp8,0,0.016780799627304076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,float16,0,0.026881599426269533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,2,128,1,float16,fp8,0,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,float16,0,0.027195200324058533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,float16,fp8,0,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,4,128,1,fp8,fp8,0,0.016550399363040924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,float16,float16,0,0.028811201453208923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,16,8,128,1,fp8,fp8,0,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,float16,0,0.2882976055145264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,float16,fp8,0,0.2468127965927124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,1,128,1,fp8,fp8,0,0.2459104061126709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,float16,0,0.3663952112197876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,float16,fp8,0,0.24597439765930176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,2,128,1,fp8,fp8,0,0.24632799625396729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,fp8,0,0.2462239980697632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,fp8,fp8,0,0.24582080841064452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,4,128,1,float16,float16,0,0.5248400211334229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,fp8,0,0.24568159580230714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,fp8,fp8,0,0.24592480659484864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,16,8,128,1,float16,float16,0,0.8411248207092286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,fp8,0,0.16228959560394288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,fp8,fp8,0,0.16243040561676025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,float16,0,0.1570672035217285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,16,128,1,float16,float16,0,0.7410175800323486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,float16,fp8,0,0.1275264024734497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,1,128,1,fp8,fp8,0,0.12747679948806762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,float16,0,0.19534560441970825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,float16,fp8,0,0.12841759920120238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,2,128,1,fp8,fp8,0,0.12837599515914916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,fp8,fp8,0,0.12787840366363526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,float16,0,0.273305606842041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,float16,fp8,0,0.12800159454345703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,4,128,1,fp8,fp8,0,0.12863839864730836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,fp8,0,0.12798559665679932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,16,8,128,1,float16,float16,0,0.4300191879272461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,fp8,0,0.08628000020980835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,fp8,fp8,0,0.08635519742965699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,16,128,1,float16,float16,0,0.38138399124145506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,float16,0,0.0904591977596283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,float16,0,0.14870879650115967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,float16,fp8,0,0.06781920194625854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,fp8,fp8,0,0.06915199756622314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,1,128,1,fp8,fp8,0,0.06807519793510437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,float16,0,0.11016319990158081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,float16,fp8,0,0.06841760277748107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,2,128,1,fp8,fp8,0,0.06840000152587891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,4,128,1,float16,fp8,0,0.06961119771003724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,float16,0,0.22547519207000732
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,float16,fp8,0,0.06995840072631836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,16,8,128,1,fp8,fp8,0,0.06981760263442993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,float16,0,0.2029711961746216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,float16,fp8,0,0.05009120106697083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,16,128,1,fp8,fp8,0,0.05106880068778992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,float16,0,0.05295360088348389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,float16,fp8,0,0.040515199303627014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,1,128,1,fp8,fp8,0,0.03957279920578003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,float16,0,0.060180801153182986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,float16,fp8,0,0.039822399616241455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,float16,0,0.12566879987716675
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,2,128,1,fp8,fp8,0,0.03935999870300293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,float16,0,0.08211519718170165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,float16,fp8,0,0.04071199893951416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,4,128,1,fp8,fp8,0,0.04108479917049408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,float16,fp8,0,0.040785598754882815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,16,8,128,1,fp8,fp8,0,0.04106720089912415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,float16,0,0.10457439422607422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,float16,fp8,0,0.02884640097618103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,fp8,0,0.02476000040769577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,16,128,1,fp8,fp8,0,0.028839999437332155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,float16,0,0.044200000166893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,float16,0,0.03297280073165894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,fp8,fp8,0,0.023662400245666505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,float16,fp8,0,0.024779200553894043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,fp8,0,0.024153600633144378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,float16,float16,0,0.058222401142120364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,1,128,1,fp8,fp8,0,0.0248416006565094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,float16,float16,0,0.0376336008310318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,2,128,1,fp8,fp8,0,0.024736000597476958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,4,128,1,float16,fp8,0,0.023683199286460878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,16,8,128,1,fp8,fp8,0,0.02476799935102463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,float16,0,0.05082719922065735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,float16,fp8,0,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,16,128,1,fp8,fp8,0,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,float16,0,0.030976000428199767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,float16,0,0.024830399453639983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,float16,fp8,0,0.016681599617004394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,1,128,1,fp8,fp8,0,0.01643040031194687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,float16,0,0.026824000477790832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,float16,fp8,0,0.016492800414562227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,2,128,1,fp8,fp8,0,0.016654400527477263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,4,128,1,fp8,fp8,0,0.01652639955282211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,float16,0,0.03712959885597229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,float16,fp8,0,0.016575999557971954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,16,8,128,1,fp8,fp8,0,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,float16,0,0.03503359854221344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,float16,fp8,0,0.014686399698257446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,16,128,1,fp8,fp8,0,0.016017599403858183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,float16,0,0.02471359968185425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,float16,0,0.024689599871635437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,float16,fp8,0,0.014494399726390838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,float16,fp8,0,0.014590400457382201
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,float16,0,0.028921601176261903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,float16,fp8,0,0.014486399292945863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,1,128,1,fp8,fp8,0,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,fp8,0,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,float16,0,0.02489600032567978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,float16,fp8,0,0.014601600170135499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,2,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,fp8,fp8,0,0.013391999900341034
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,4,128,1,fp8,fp8,0,0.014473600685596466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,16,8,128,1,fp8,fp8,0,0.014572800695896148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,float16,float16,0,0.028910401463508605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,16,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,float16,0,0.02489279955625534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,1,128,1,float16,fp8,0,0.012995199859142303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,float16,0,0.0233024001121521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,float16,fp8,0,0.013238400220870972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,2,128,1,fp8,fp8,0,0.013798399269580841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,float16,0,0.024716800451278685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,float16,fp8,0,0.01268800050020218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,4,128,1,fp8,fp8,0,0.012628799676895142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,float16,0,0.024697600305080412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,float16,fp8,0,0.012580800056457519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,16,8,128,1,fp8,fp8,0,0.012561599910259246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,float16,0,0.02301120012998581
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,float16,fp8,0,0.01324480026960373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,16,128,1,fp8,fp8,0,0.013566400110721587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,float16,0,0.022841599583625794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,float16,fp8,0,0.01260959953069687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,1,128,1,fp8,fp8,0,0.012569600343704223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,float16,0,0.022708800435066224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,2,128,1,fp8,fp8,0,0.01263200044631958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,float16,0,0.022726400196552275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,float16,fp8,0,0.012457600235939026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,4,128,1,fp8,fp8,0,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,float16,0,0.022761599719524385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,float16,fp8,0,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,16,8,128,1,fp8,fp8,0,0.012540799379348756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,float16,0,0.18240799903869628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,float16,fp8,0,0.15013920068740844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,1,128,1,fp8,fp8,0,0.14998079538345338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,float16,0,0.2203439950942993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,float16,fp8,0,0.15023200511932372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,fp8,0,0.15002880096435547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,2,128,1,fp8,fp8,0,0.14971040487289428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,fp8,0,0.1501296043395996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,float16,float16,0,0.29691998958587645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,4,128,1,fp8,fp8,0,0.15024160146713256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,float16,float16,0,0.4528639793395996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,16,8,128,1,fp8,fp8,0,0.15004160404205322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,fp8,0,0.0964896023273468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,float16,float16,0,0.39167520999908445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,16,128,1,fp8,fp8,0,0.09692800045013428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,float16,0,0.1037824034690857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,float16,fp8,0,0.0795199990272522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,1,128,1,fp8,fp8,0,0.08001279830932617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,float16,0,0.12304480075836181
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,float16,fp8,0,0.07999359965324401
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,2,128,1,fp8,fp8,0,0.0800383985042572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,float16,0,0.16024800539016723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,float16,fp8,0,0.0801967978477478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,4,128,1,fp8,fp8,0,0.08009600043296813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,fp8,0,0.0800704002380371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,float16,float16,0,0.23815999031066895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,16,8,128,1,fp8,fp8,0,0.08021119832992554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,float16,0,0.20716478824615478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,float16,fp8,0,0.05345119833946228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,16,128,1,fp8,fp8,0,0.053395199775695804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,float16,0,0.05948479771614075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,float16,fp8,0,0.04323199987411499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,1,128,1,fp8,fp8,0,0.0432671993970871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,float16,0,0.0666703999042511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,float16,fp8,0,0.04319039881229401
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,2,128,1,fp8,fp8,0,0.04338560104370117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,float16,0,0.08925759792327881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,float16,fp8,0,0.043222400546073916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,4,128,1,fp8,fp8,0,0.04323360025882721
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,float16,0,0.13005919456481935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,float16,fp8,0,0.04321439862251282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,16,8,128,1,fp8,fp8,0,0.04324800074100495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,fp8,0,0.030985599756240843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,float16,float16,0,0.10925920009613037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,16,128,1,fp8,fp8,0,0.03094879984855652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,float16,0,0.03699359893798828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,float16,fp8,0,0.026895999908447266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,1,128,1,fp8,fp8,0,0.026774400472640993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,float16,0,0.041254401206970215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,float16,fp8,0,0.026873600482940675
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,2,128,1,fp8,fp8,0,0.0268640011548996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,float16,0,0.04733920097351074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,float16,fp8,0,0.026825600862503053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,4,128,1,fp8,fp8,0,0.02680320143699646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,float16,0,0.0631600022315979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,float16,fp8,0,0.026830399036407472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,16,8,128,1,fp8,fp8,0,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,float16,0,0.05143359899520874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,float16,fp8,0,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,16,128,1,fp8,fp8,0,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,float16,0,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,float16,fp8,0,0.01671999990940094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,1,128,1,fp8,fp8,0,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,float16,0,0.026881599426269533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,float16,fp8,0,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,2,128,1,fp8,fp8,0,0.01671359986066818
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,float16,0,0.033107200264930726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,float16,fp8,0,0.016620799899101257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,4,128,1,fp8,fp8,0,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,float16,0,0.039059200882911684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,float16,fp8,0,0.01666879951953888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,16,8,128,1,fp8,fp8,0,0.016646400094032288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,float16,0,0.0337583988904953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,fp8,0,0.012476799637079239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,16,128,1,fp8,fp8,0,0.012681600451469422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,float16,0,0.02276480048894882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,float16,fp8,0,0.012492799758911132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,1,128,1,fp8,fp8,0,0.012435200065374375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,float16,float16,0,0.022729599475860597
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,2,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,float16,0,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,4,128,1,fp8,fp8,0,0.012544000148773193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,float16,0,0.028696000576019287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,16,8,128,1,fp8,fp8,0,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,float16,0,0.026919999718666078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,fp8,0,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,float16,fp8,0,0.012439999729394913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,16,128,1,fp8,fp8,0,0.01165440008044243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,fp8,0,0.010593599826097488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,float16,0,0.02263679951429367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,fp8,fp8,0,0.010611200332641601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,1,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,float16,float16,0,0.020745599269866945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,2,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,4,128,1,float16,float16,0,0.021305599808692934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,float16,0,0.020708799362182617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,float16,0,0.022742399573326112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,fp8,fp8,0,0.010633599758148194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,16,8,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,float16,0,0.020710399746894835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,16,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,1,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,float16,0,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,2,128,1,fp8,fp8,0,0.010539200156927109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,float16,float16,0,0.019633600115776063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,fp8,fp8,0,0.010599999874830245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,4,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,float16,0,0.019540800154209136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,16,8,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,16,128,1,float16,float16,0,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,float16,0,0.019190399348735808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,1,128,1,fp8,fp8,0,0.010449600219726563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,fp8,fp8,0,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,float16,0,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,2,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,float16,0,0.01876160055398941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,4,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,float16,float16,0,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,16,8,128,1,fp8,fp8,0,0.010580799728631973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,float16,0,0.14466880559921264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,float16,fp8,0,0.11656479835510254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,1,128,1,fp8,fp8,0,0.11574399471282959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,float16,0,0.1646623969078064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,float16,fp8,0,0.11611520051956177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,2,128,1,fp8,fp8,0,0.11595040559768677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,float16,0,0.20319199562072754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,fp8,fp8,0,0.11648319959640503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,float16,fp8,0,0.11603360176086426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,float16,0,0.22696321010589598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,4,128,1,fp8,fp8,0,0.11659679412841797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,float16,0,0.27998719215393064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,16,8,128,1,float16,fp8,0,0.11626559495925903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,float16,fp8,0,0.07212640047073364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,16,128,1,fp8,fp8,0,0.07206400036811829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,float16,0,0.0802623987197876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,float16,fp8,0,0.062105602025985716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,1,128,1,fp8,fp8,0,0.06187199950218201
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,float16,0,0.08909119963645935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,float16,fp8,0,0.061831998825073245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,2,128,1,fp8,fp8,0,0.06204320192337036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,float16,0,0.11303039789199829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,float16,fp8,0,0.062219202518463135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,4,128,1,fp8,fp8,0,0.06218240261077881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,float16,0,0.15198080539703368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,float16,fp8,0,0.06217280030250549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,16,8,128,1,fp8,fp8,0,0.06287040114402771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,float16,0,0.12308640480041504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,float16,fp8,0,0.039208000898361205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,16,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,float16,0,0.047367998957633974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,float16,fp8,0,0.03505600094795227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,1,128,1,fp8,fp8,0,0.0350816011428833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,float16,0,0.05156639814376831
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,float16,fp8,0,0.03510879874229431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,2,128,1,fp8,fp8,0,0.03504000008106232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,float16,0,0.058531200885772704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,float16,fp8,0,0.03512639999389648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,4,128,1,fp8,fp8,0,0.035097599029541016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,float16,0,0.07411999702453613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,float16,fp8,0,0.03504799902439117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,16,8,128,1,fp8,fp8,0,0.03514559864997864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,float16,0,0.058192002773284915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,float16,fp8,0,0.024728000164031982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,16,128,1,fp8,fp8,0,0.024900799989700316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,float16,0,0.033022400736808774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,float16,fp8,0,0.022814400494098663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,1,128,1,fp8,fp8,0,0.02282879948616028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,float16,0,0.03313600122928619
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,float16,fp8,0,0.02258239984512329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,2,128,1,fp8,fp8,0,0.02284799963235855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,float16,0,0.039156800508499144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,float16,fp8,0,0.022801600396633148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,4,128,1,fp8,fp8,0,0.022737599909305573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,float16,0,0.04522239863872528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,float16,fp8,0,0.02271520048379898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,16,8,128,1,fp8,fp8,0,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,float16,0,0.03710080087184906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,float16,fp8,0,0.01641920059919357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,16,128,1,fp8,fp8,0,0.01650879979133606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,float16,0,0.026900801062583923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,float16,fp8,0,0.014681600034236908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,1,128,1,fp8,fp8,0,0.014528000354766845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,float16,0,0.02680320143699646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,float16,fp8,0,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,fp8,fp8,0,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,2,128,1,fp8,fp8,0,0.014480000734329224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,float16,0,0.026923200488090514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,4,128,1,fp8,fp8,0,0.014696000516414643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,float16,0,0.031041601300239564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,16,8,128,1,float16,fp8,0,0.014667199552059173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,float16,0,0.02686080038547516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,float16,fp8,0,0.012041600048542022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,16,128,1,fp8,fp8,0,0.010532800108194351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,float16,0,0.022444799542427063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,1,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,float16,0,0.021305599808692934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,2,128,1,fp8,fp8,0,0.010491199791431427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,float16,0,0.022726400196552275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,fp8,0,0.010571199655532836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,4,128,1,fp8,fp8,0,0.010659199953079224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,float16,0,0.022804799675941467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,float16,fp8,0,0.010524799674749374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,16,8,128,1,fp8,fp8,0,0.01045759990811348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,float16,float16,0,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,16,128,1,fp8,fp8,0,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,float16,0,0.020601600408554077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,float16,fp8,0,0.010742399841547012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,1,128,1,fp8,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,float16,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,2,128,1,fp8,fp8,0,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,float16,0,0.019619199633598327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,4,128,1,fp8,fp8,0,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,float16,0,0.02062080055475235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,float16,fp8,0,0.010971199721097946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,16,8,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,fp8,fp8,0,0.009516800194978714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,float16,0,0.0187376007437706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,16,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,1,128,1,float16,fp8,0,0.010337600111961364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,fp8,0,0.009387200325727462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,fp8,fp8,0,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,2,128,1,float16,float16,0,0.018777599930763243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,float16,0,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,float16,0,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,float16,0,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,16,8,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,float16,fp8,0,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,16,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,float16,0,0.01874080002307892
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,1,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,float16,float16,0,0.018966400623321535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,2,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,float16,0,0.018799999356269838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,float16,0,0.018646399676799773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,4,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,16,8,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,float16,0,0.13027679920196533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,float16,fp8,0,0.09948959946632385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,1,128,1,fp8,fp8,0,0.09915680289268494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,float16,0,0.13494080305099487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,float16,fp8,0,0.09962559938430786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,2,128,1,fp8,fp8,0,0.09911360144615174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,float16,0,0.15810240507125856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,float16,fp8,0,0.10040960311889649
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,4,128,1,fp8,fp8,0,0.09949600100517272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,float16,0,0.19810240268707274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,float16,fp8,0,0.09966719746589661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,16,8,128,1,fp8,fp8,0,0.099891197681427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,float16,0,0.14649759531021117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,float16,fp8,0,0.05798720121383667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,16,128,1,fp8,fp8,0,0.05841919779777527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,float16,0,0.07217280268669128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,float16,fp8,0,0.05410879850387573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,1,128,1,fp8,fp8,0,0.053932797908782956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,float16,0,0.07617599964141845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,float16,fp8,0,0.05387679934501648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,2,128,1,fp8,fp8,0,0.05386719703674316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,float16,0,0.08053920269012452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,float16,fp8,0,0.053895998001098636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,4,128,1,fp8,fp8,0,0.05505920052528381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,float16,0,0.09885920286178589
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,float16,fp8,0,0.0540880024433136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,fp8,fp8,0,0.03145439922809601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,16,8,128,1,fp8,fp8,0,0.0547327995300293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,float16,0,0.06976799964904785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,float16,fp8,0,0.03311200141906738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,16,128,1,fp8,fp8,0,0.03311359882354736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,float16,0,0.04529919922351837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,1,128,1,float16,fp8,0,0.03100000023841858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,float16,0,0.04524799883365631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,fp8,0,0.031097599864006044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,float16,fp8,0,0.031302401423454286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,2,128,1,fp8,fp8,0,0.03182399868965149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,float16,0,0.04969919919967651
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,float16,fp8,0,0.030956798791885377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,4,128,1,fp8,fp8,0,0.030904000997543334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,float16,float16,0,0.05569599866867066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,16,8,128,1,fp8,fp8,0,0.031171199679374696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,float16,0,0.04337440133094787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,float16,fp8,0,0.021673600375652313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,16,128,1,fp8,fp8,0,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,float16,0,0.03286080062389374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,float16,fp8,0,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,1,128,1,fp8,fp8,0,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,float16,0,0.03290559947490692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,float16,fp8,0,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,2,128,1,fp8,fp8,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,float16,0,0.03300800025463104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,float16,fp8,0,0.020695999264717102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,4,128,1,fp8,fp8,0,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,float16,0,0.037248000502586365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,float16,fp8,0,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,16,8,128,1,fp8,fp8,0,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,float16,0,0.03100000023841858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,float16,fp8,0,0.014470399916172027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,16,128,1,fp8,fp8,0,0.014534400403499603
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,float16,0,0.024798400700092316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,1,128,1,fp8,fp8,0,0.01448799967765808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,float16,0,0.025409600138664244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,float16,fp8,0,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,2,128,1,fp8,fp8,0,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,float16,0,0.0248416006565094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,float16,fp8,0,0.01462559998035431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,4,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,float16,0,0.02694239914417267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,16,8,128,1,fp8,fp8,0,0.014484800398349762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,float16,0,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,16,128,1,fp8,fp8,0,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,float16,0,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,float16,fp8,0,0.010654400289058685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,1,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,fp8,0,0.010526400059461594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,float16,float16,0,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,2,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,float16,0,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,4,128,1,fp8,fp8,0,0.010502400249242783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,float16,0,0.020735999941825865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,fp8,0,0.01040479987859726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,16,8,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,float16,0,0.01876319944858551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,16,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,float16,float16,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,float16,fp8,0,0.010164800286293029
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,2,128,1,fp8,fp8,0,0.01000479981303215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,float16,0,0.018756799399852753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,float16,fp8,0,0.010556799918413162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,4,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,float16,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,16,8,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,float16,fp8,0,0.010316800326108932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,float16,0,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,16,128,1,fp8,fp8,0,0.010302399843931198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,float16,fp8,0,0.00904799997806549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,1,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,float16,0,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,float16,fp8,0,0.00958079993724823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,2,128,1,fp8,fp8,0,0.008420799672603608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,float16,0,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,float16,fp8,0,0.009497600048780442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,4,128,1,fp8,fp8,0,0.009808000177145004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,float16,0,0.018667200207710268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,float16,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,16,8,128,1,fp8,fp8,0,0.010351999849081039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,float16,0,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,float16,fp8,0,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,16,128,1,fp8,fp8,0,0.009403199702501298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,float16,0,0.018769599497318268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,fp8,0,0.008585599809885025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,float16,fp8,0,0.008508799970149994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,1,128,1,fp8,fp8,0,0.009929600358009338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,float16,0,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,float16,fp8,0,0.008568000048398972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,2,128,1,fp8,fp8,0,0.008849599957466125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,float16,float16,0,0.018580800294876097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,4,128,1,fp8,fp8,0,0.009564799815416336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,float16,0,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,float16,fp8,0,0.008423999696969987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,16,8,128,1,fp8,fp8,0,0.010214400291442872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,float16,0,0.11780480146408082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,float16,fp8,0,0.0927727997303009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,1,128,1,fp8,fp8,0,0.09406399726867676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,float16,0,0.12370400428771973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,float16,0,0.14695520401000978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,float16,fp8,0,0.09282240271568298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,2,128,1,fp8,fp8,0,0.09296320080757141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,float16,0,0.12952959537506104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,float16,fp8,0,0.09304320216178893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,4,128,1,fp8,fp8,0,0.09282240271568298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,float16,fp8,0,0.09365280270576477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,16,8,128,1,fp8,fp8,0,0.09303680062294006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,float16,0,0.09525600075721741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,float16,0,0.07092639803886414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,float16,fp8,0,0.053544002771377566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,16,128,1,fp8,fp8,0,0.05363839864730835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,float16,0,0.07036319971084595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,float16,fp8,0,0.051585602760314944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,1,128,1,fp8,fp8,0,0.05141440033912659
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,float16,fp8,0,0.05173919796943664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,2,128,1,fp8,fp8,0,0.05146719813346863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,float16,0,0.07592160105705262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,float16,fp8,0,0.051446402072906496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,4,128,1,fp8,fp8,0,0.05152959823608398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,float16,0,0.08123679757118225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,float16,fp8,0,0.05176479816436767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,16,8,128,1,fp8,fp8,0,0.051734399795532224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,float16,0,0.05599679946899414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,float16,fp8,0,0.031249600648880004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,16,128,1,fp8,fp8,0,0.030985599756240843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,float16,0,0.04521760046482086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,float16,fp8,0,0.030847999453544616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,1,128,1,fp8,fp8,0,0.03062880039215088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,float16,0,0.045307201147079465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,float16,fp8,0,0.030769601464271545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,2,128,1,fp8,fp8,0,0.03091199994087219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,float16,0,0.04521439969539642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,float16,fp8,0,0.030904000997543334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,4,128,1,fp8,fp8,0,0.030876800417900085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,float16,0,0.04978879988193512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,float16,fp8,0,0.031001600623130798
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,16,8,128,1,fp8,fp8,0,0.03091680109500885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,float16,0,0.0372655987739563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,float16,fp8,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,16,128,1,fp8,fp8,0,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,float16,0,0.03221920132637024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,float16,fp8,0,0.02045599967241287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,1,128,1,fp8,fp8,0,0.01940000057220459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,float16,0,0.032779198884963986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,float16,fp8,0,0.02053920030593872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,2,128,1,fp8,fp8,0,0.020304000377655028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,float16,0,0.03126400113105774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,float16,fp8,0,0.020414400100708007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,4,128,1,fp8,fp8,0,0.02022559940814972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,float16,0,0.03299840092658997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,float16,fp8,0,0.01911199986934662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,16,8,128,1,fp8,fp8,0,0.019811199605464937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,float16,0,0.024726399779319765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,float16,fp8,0,0.013414399325847625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,16,128,1,fp8,fp8,0,0.014476799964904785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,float16,0,0.023940800130367278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,float16,fp8,0,0.013356800377368926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,1,128,1,fp8,fp8,0,0.012647999823093415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,float16,0,0.024697600305080412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,fp8,fp8,0,0.012916800379753113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,2,128,1,float16,fp8,0,0.012608000636100769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,float16,0,0.023416000604629516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,float16,fp8,0,0.013766400516033173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,4,128,1,fp8,fp8,0,0.01266240030527115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,float16,0,0.024716800451278685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,16,8,128,1,fp8,fp8,0,0.0126351997256279
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,float16,0,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,16,128,1,fp8,fp8,0,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,float16,0,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,1,128,1,fp8,fp8,0,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,float16,0,0.019092799723148347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,float16,fp8,0,0.010351999849081039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,2,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,float16,0,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,float16,fp8,0,0.010527999699115753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,4,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,float16,0,0.019849599897861482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,16,8,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,float16,0,0.01876640021800995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,16,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,float16,0,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,1,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,float16,0,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,float16,fp8,0,0.009017600119113922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,2,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,float16,0,0.018743999302387238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,float16,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,4,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,fp8,fp8,0,0.009380800276994705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,float16,float16,0,0.01879200041294098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,16,8,128,1,fp8,fp8,0,0.01053759977221489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,float16,0,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,16,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,float16,0,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,float16,fp8,0,0.008476799726486206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,1,128,1,fp8,fp8,0,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,float16,0,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,float16,fp8,0,0.008558399975299835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,2,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,float16,fp8,0,0.008951999992132188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,4,128,1,fp8,fp8,0,0.008953599631786347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,float16,fp8,0,0.009046400338411332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,16,8,128,1,fp8,fp8,0,0.009064000099897385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,float16,0,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,float16,fp8,0,0.009854400157928466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,16,128,1,fp8,fp8,0,0.008628799766302108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,fp8,0,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,1,128,1,fp8,fp8,0,0.008481600135564805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,float16,0,0.018624000251293182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,float16,fp8,0,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,2,128,1,fp8,fp8,0,0.009353599697351455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,float16,0,0.0186271995306015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,float16,fp8,0,0.008379200100898742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,4,128,1,fp8,fp8,0,0.009166400134563445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,float16,0,0.01874080002307892
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,float16,fp8,0,0.009571199864149093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,16,8,128,1,fp8,fp8,0,0.008579199761152267
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,fp8,0,4.318632125854492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,fp8,fp8,0,4.330651092529297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,fp8,0,4.369956970214844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,fp8,fp8,0,4.376587295532227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,1,128,1,float16,float16,0,5.401470565795899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,2,128,1,float16,float16,0,5.571761703491211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,float16,0,5.8969982147216795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,float16,0,2.6981664657592774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,float16,0,3.5434848785400392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,float16,fp8,0,2.6882112503051756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,float16,fp8,0,4.389884948730469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,12,4,128,1,fp8,fp8,0,4.409121704101563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,1,128,1,fp8,fp8,0,2.6308128356933596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,float16,fp8,0,2.2932527542114256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,12,128,1,fp8,fp8,0,2.351697540283203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,fp8,0,2.2853008270263673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,float16,float16,0,2.8506399154663087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,2,128,1,fp8,fp8,0,2.2267328262329102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,fp8,0,2.2432031631469727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,float16,float16,0,2.8931583404541015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,fp8,0,1.2109264373779296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,12,4,128,1,fp8,fp8,0,2.3042575836181642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,float16,float16,0,1.7900400161743164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,12,128,1,fp8,fp8,0,1.2298768043518067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,fp8,0,1.1731616020202638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,fp8,fp8,0,1.1786720275878906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,1,128,1,float16,float16,0,1.5179072380065919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,fp8,0,1.1729552268981933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,float16,float16,0,1.4413776397705078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,2,128,1,fp8,fp8,0,1.1800975799560547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,fp8,0,1.1770208358764649
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,float16,float16,0,1.4802351951599122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,float16,0,0.9542847633361816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,float16,fp8,0,0.7358335971832275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,12,4,128,1,fp8,fp8,0,1.173691177368164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,12,128,1,fp8,fp8,0,0.6682864189147949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,fp8,0,0.6510543823242188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,float16,float16,0,0.7696656227111817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,1,128,1,fp8,fp8,0,0.6493199825286865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,float16,0,0.7702816009521485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,float16,fp8,0,0.6575439929962158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,2,128,1,fp8,fp8,0,0.6833680152893067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,float16,0,0.7926703929901123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,float16,fp8,0,0.6487360000610352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,12,4,128,1,fp8,fp8,0,0.650387191772461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,fp8,0,2.5403472900390627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,float16,float16,0,3.0463247299194336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,1,128,1,fp8,fp8,0,2.6295743942260743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,fp8,0,2.5484975814819335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,fp8,fp8,0,2.5865104675292967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,fp8,0,2.5635007858276366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,2,128,1,float16,float16,0,3.236248016357422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,float16,float16,0,3.368881607055664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,fp8,0,1.376580810546875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,fp8,fp8,0,1.3781071662902833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,float16,0,1.520900821685791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,float16,fp8,0,1.3279919624328613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,12,4,128,1,fp8,fp8,0,2.559681510925293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,1,128,1,fp8,fp8,0,1.5262720108032226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,12,128,1,float16,float16,0,2.5987855911254885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,fp8,0,1.3233759880065918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,float16,float16,0,1.8352048873901368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,2,128,1,fp8,fp8,0,1.322652816772461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,fp8,0,1.3295616149902343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,fp8,0,0.8143312454223632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,fp8,fp8,0,1.3544159889221192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,float16,float16,0,1.1785632133483888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,12,4,128,1,float16,float16,0,1.7749408721923827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,12,128,1,fp8,fp8,0,0.7392047882080078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,fp8,0,0.710310411453247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,float16,float16,0,0.9329327583312989
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,1,128,1,fp8,fp8,0,0.7106991767883301
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,float16,0,0.8477760314941406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,fp8,fp8,0,0.7141392230987549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,2,128,1,float16,fp8,0,0.712343978881836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,fp8,0,0.7108943939208985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,float16,float16,0,0.9318927764892578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,12,4,128,1,fp8,fp8,0,0.7103248119354248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,fp8,0,0.41884641647338866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,float16,float16,0,0.6469615936279297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,12,128,1,fp8,fp8,0,0.4300288200378418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,float16,0,0.4472928047180176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,float16,fp8,0,0.4032927989959717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,1,128,1,fp8,fp8,0,0.4036880016326904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,float16,0,0.5030879974365234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,float16,0,0.4785935878753662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,float16,fp8,0,0.4040559768676758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,2,128,1,fp8,fp8,0,0.40296640396118166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,float16,fp8,0,0.4457727909088135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,12,4,128,1,fp8,fp8,0,0.40572800636291506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,fp8,0,1.830094337463379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,float16,float16,0,2.1354463577270506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,1,128,1,fp8,fp8,0,1.8324975967407227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,fp8,0,1.8347408294677734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,float16,float16,0,2.2353919982910155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,2,128,1,fp8,fp8,0,1.8349536895751952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,float16,0,2.4540624618530273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,fp8,0,1.0036319732666015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,float16,fp8,0,1.8341007232666016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,float16,float16,0,1.6460943222045898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,12,128,1,fp8,fp8,0,1.1702639579772949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,float16,0,1.095531177520752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,12,4,128,1,fp8,fp8,0,2.1952560424804686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,float16,fp8,0,1.0538911819458008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,1,128,1,fp8,fp8,0,0.955412769317627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,fp8,0,0.9594655990600586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,float16,float16,0,1.1499728202819823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,2,128,1,fp8,fp8,0,1.0326416015625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,fp8,0,1.0160431861877441
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,float16,float16,0,1.2888416290283202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,fp8,0,0.54475998878479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,12,4,128,1,fp8,fp8,0,0.9972047805786133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,fp8,fp8,0,0.5912543773651123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,12,128,1,float16,float16,0,0.8611871719360351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,float16,0,0.5905519962310791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,float16,fp8,0,0.5268032073974609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,1,128,1,fp8,fp8,0,0.5883711814880371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,fp8,0,0.5197391986846924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,float16,float16,0,0.6123744010925293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,2,128,1,fp8,fp8,0,0.5313632011413574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,float16,0,0.673632001876831
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,float16,fp8,0,0.5221744060516358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,12,4,128,1,fp8,fp8,0,0.5271679878234863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,float16,0,0.47501440048217775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,float16,fp8,0,0.33322720527648925
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,float16,0,0.33972160816192626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,12,128,1,fp8,fp8,0,0.3161072015762329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,fp8,fp8,0,0.3021791934967041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,float16,0,0.32837278842926027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,float16,fp8,0,0.30600318908691404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,1,128,1,fp8,fp8,0,0.3060895919799805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,2,128,1,float16,fp8,0,0.30223679542541504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,float16,0,0.3768255949020386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,float16,fp8,0,0.30431199073791504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,12,4,128,1,fp8,fp8,0,0.3029360055923462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,fp8,0,2.3883119583129884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,fp8,fp8,0,2.388515281677246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,1,128,1,float16,float16,0,2.9332000732421877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,fp8,0,2.3913488388061523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,fp8,fp8,0,2.3917472839355467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,2,128,1,float16,float16,0,3.0164768218994142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,fp8,0,2.390603256225586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,float16,float16,0,3.31175537109375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,fp8,fp8,0,1.3154640197753906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,fp8,0,1.6049615859985351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,float16,0,1.4238800048828124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,fp8,fp8,0,1.2291343688964844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,12,4,128,1,fp8,fp8,0,2.396446418762207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,1,128,1,float16,fp8,0,1.4939359664916991
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,12,128,1,float16,float16,0,2.2966480255126953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,float16,0,1.510257625579834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,float16,fp8,0,1.2353952407836915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,2,128,1,fp8,fp8,0,1.458233642578125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,fp8,0,1.2297391891479492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,fp8,0,0.6876319885253906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,fp8,fp8,0,1.2587008476257324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,float16,float16,0,1.2210000038146973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,12,4,128,1,float16,float16,0,1.800961685180664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,12,128,1,fp8,fp8,0,0.793614387512207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,float16,0,0.7400591850280762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,float16,fp8,0,0.6546256065368652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,1,128,1,fp8,fp8,0,0.6500976085662842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,float16,0,0.7786287784576416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,float16,fp8,0,0.6515359878540039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,2,128,1,fp8,fp8,0,0.6586112022399903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,fp8,0,0.7090528011322021
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,float16,float16,0,0.9338432312011719
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,12,4,128,1,fp8,fp8,0,0.6508528232574463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,fp8,0,0.3799263954162598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,float16,float16,0,0.6248144149780274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,12,128,1,fp8,fp8,0,0.38115520477294923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,float16,0,0.40293278694152834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,float16,fp8,0,0.35953919887542723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,1,128,1,fp8,fp8,0,0.38517920970916747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,float16,0,0.4643375873565674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,float16,0,0.4404736042022705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,float16,fp8,0,0.3589871883392334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,2,128,1,fp8,fp8,0,0.3593120098114014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,float16,fp8,0,0.3621328115463257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,12,4,128,1,fp8,fp8,0,0.35965280532836913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,float16,0,0.3484463930130005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,float16,fp8,0,0.2244431972503662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,12,128,1,fp8,fp8,0,0.2214672088623047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,float16,0,0.2331167936325073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,float16,fp8,0,0.21203041076660156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,float16,0,0.2604703903198242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,float16,fp8,0,0.21239039897918702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,1,128,1,fp8,fp8,0,0.21275200843811035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,float16,0,0.24276480674743653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,float16,fp8,0,0.2118527889251709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,2,128,1,fp8,fp8,0,0.2135711908340454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,12,4,128,1,fp8,fp8,0,0.21256799697875978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,float16,0,1.65423526763916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,float16,fp8,0,1.4402400016784669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,1,128,1,fp8,fp8,0,1.439793586730957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,fp8,0,1.4395456314086914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,float16,float16,0,1.7726463317871093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,2,128,1,fp8,fp8,0,1.441808032989502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,float16,0,2.0226512908935548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,fp8,0,0.8005840301513671
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,fp8,fp8,0,1.4410016059875488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,fp8,fp8,0,0.8934944152832032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,float16,0,0.8483391761779785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,12,4,128,1,float16,fp8,0,1.6714656829833985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,12,128,1,float16,float16,0,1.506777572631836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,float16,fp8,0,0.8021984100341797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,1,128,1,fp8,fp8,0,0.747654390335083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,float16,0,0.9145392417907715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,float16,fp8,0,0.853923225402832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,2,128,1,fp8,fp8,0,0.7893856048583985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,fp8,0,0.7484255790710449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,float16,float16,0,1.0353455543518066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,12,4,128,1,fp8,fp8,0,0.7627615928649902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,fp8,0,0.46822400093078614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,float16,float16,0,0.7823200225830078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,12,128,1,fp8,fp8,0,0.44318242073059083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,float16,0,0.4551152229309082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,float16,fp8,0,0.40201120376586913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,1,128,1,fp8,fp8,0,0.40370879173278806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,float16,0,0.4855184078216553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,float16,fp8,0,0.4183951854705811
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,2,128,1,fp8,fp8,0,0.4015984058380127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,float16,0,0.5392975807189941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,float16,fp8,0,0.4103519916534424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,12,4,128,1,fp8,fp8,0,0.40480480194091795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,fp8,0,0.24417440891265868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,float16,float16,0,0.42253599166870115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,12,128,1,fp8,fp8,0,0.24481120109558105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,float16,0,0.25622079372406004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,float16,fp8,0,0.22803840637207032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,1,128,1,fp8,fp8,0,0.22822880744934082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,float16,0,0.27177278995513915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,float16,fp8,0,0.22830240726470946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,2,128,1,fp8,fp8,0,0.22750399112701417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,fp8,fp8,0,0.15216000080108644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,float16,0,0.30182719230651855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,float16,fp8,0,0.22810719013214112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,fp8,fp8,0,0.1443295955657959
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,12,4,128,1,fp8,fp8,0,0.229584002494812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,float16,0,0.2448575973510742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,fp8,fp8,0,0.14526720046997071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,12,128,1,float16,fp8,0,0.15096960067749024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,float16,0,0.1611296057701111
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,fp8,fp8,0,0.14444320201873778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,1,128,1,float16,fp8,0,0.14422080516815186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,float16,0,0.1656208038330078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,2,128,1,float16,fp8,0,0.1435520052909851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,float16,0,0.17483359575271606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,12,4,128,1,float16,fp8,0,0.1442095994949341
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,fp8,0,1.4081151962280274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,float16,float16,0,1.5938976287841797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,1,128,1,fp8,fp8,0,1.4084383964538574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,fp8,0,1.4079808235168456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,float16,float16,0,1.7621776580810546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,2,128,1,fp8,fp8,0,1.4050576210021972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,fp8,0,1.4066080093383788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,float16,float16,0,2.0917327880859373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,fp8,0,0.7949888229370117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,fp8,fp8,0,0.7935296058654785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,float16,0,0.8626848220825195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,float16,fp8,0,0.7229472160339355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,12,4,128,1,fp8,fp8,0,1.4082143783569336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,1,128,1,fp8,fp8,0,0.7233248233795166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,12,128,1,float16,float16,0,1.7436784744262694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,fp8,0,0.7241712093353272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,fp8,fp8,0,0.7232160091400146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,2,128,1,float16,float16,0,0.915129566192627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,fp8,0,0.7232016086578369
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,float16,float16,0,1.0832207679748536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,fp8,0,0.42259998321533204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,12,4,128,1,fp8,fp8,0,0.7250016212463379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,fp8,fp8,0,0.4200255870819092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,12,128,1,float16,float16,0,0.869876766204834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,float16,0,0.42778878211975097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,fp8,fp8,0,0.3824352025985718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,float16,fp8,0,0.39428799152374266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,1,128,1,fp8,fp8,0,0.3821471929550171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,float16,0,0.49524641036987305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,2,128,1,float16,fp8,0,0.38310880661010743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,fp8,0,0.38258559703826905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,float16,float16,0,0.5510303974151611
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,12,4,128,1,fp8,fp8,0,0.38266720771789553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,fp8,0,0.23194398880004882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,float16,float16,0,0.4587456226348877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,12,128,1,fp8,fp8,0,0.23119521141052246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,float16,0,0.23907198905944824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,float16,fp8,0,0.21107521057128906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,1,128,1,fp8,fp8,0,0.21047520637512207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,float16,0,0.2618079900741577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,float16,fp8,0,0.21150081157684325
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,2,128,1,fp8,fp8,0,0.2108112096786499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,float16,0,0.2995151996612549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,fp8,fp8,0,0.1353600025177002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,float16,fp8,0,0.21202559471130372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,12,4,128,1,fp8,fp8,0,0.21214079856872559
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,float16,0,0.25452160835266113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,12,128,1,float16,fp8,0,0.13558559417724608
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,float16,0,0.13990240097045897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,float16,fp8,0,0.1261904001235962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,float16,0,0.16558560132980346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,1,128,1,fp8,fp8,0,0.1260591983795166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,float16,0,0.14755359888076783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,float16,fp8,0,0.12616319656372071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,2,128,1,fp8,fp8,0,0.1256160020828247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,float16,fp8,0,0.12567199468612672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,12,4,128,1,fp8,fp8,0,0.12584320306777955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,float16,0,0.13492480516433716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,float16,fp8,0,0.08689119815826415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,float16,0,0.09784799814224243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,12,128,1,fp8,fp8,0,0.08652639985084534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,float16,0,0.09323359727859497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,float16,fp8,0,0.08230720162391662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,1,128,1,fp8,fp8,0,0.08249760270118714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,float16,fp8,0,0.0824288010597229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,2,128,1,fp8,fp8,0,0.08238400220870971
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,float16,0,0.10512959957122803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,float16,fp8,0,0.08223999738693237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,12,4,128,1,fp8,fp8,0,0.08245440125465393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,float16,0,0.9788432121276855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,float16,fp8,0,0.8776944160461426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,1,128,1,fp8,fp8,0,0.8776896476745606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,fp8,0,0.8774080276489258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,float16,float16,0,1.1046624183654785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,2,128,1,fp8,fp8,0,0.8772319793701172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,fp8,0,0.5101263999938965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,fp8,0,0.8762175559997558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,fp8,fp8,0,0.9128239631652832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,fp8,fp8,0,0.5090720176696777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,float16,0,0.5149343967437744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,12,4,128,1,float16,float16,0,1.348961639404297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,float16,fp8,0,0.4555408000946045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,fp8,0,0.4720208168029785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,12,128,1,float16,float16,0,1.1700016021728517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,1,128,1,fp8,fp8,0,0.45563359260559083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,float16,float16,0,0.5682191848754883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,fp8,fp8,0,0.4559391975402832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,2,128,1,fp8,fp8,0,0.4550335884094238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,fp8,0,0.4553408145904541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,12,4,128,1,float16,float16,0,0.6983727931976318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,fp8,0,0.27158560752868655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,float16,float16,0,0.6149040222167969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,12,128,1,fp8,fp8,0,0.27186079025268556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,fp8,fp8,0,0.2446768045425415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,float16,0,0.2762176036834717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,float16,fp8,0,0.24446239471435546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,1,128,1,fp8,fp8,0,0.24487841129302979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,float16,0,0.3048448085784912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,2,128,1,float16,fp8,0,0.2465264081954956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,float16,0,0.36454880237579346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,float16,fp8,0,0.24503519535064697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,12,4,128,1,fp8,fp8,0,0.244867205619812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,fp8,0,0.15400320291519165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,float16,float16,0,0.32361440658569335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,12,128,1,fp8,fp8,0,0.15400320291519165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,float16,0,0.15605920553207397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,float16,fp8,0,0.14019360542297363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,1,128,1,fp8,fp8,0,0.1387712001800537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,float16,0,0.17255200147628785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,float16,fp8,0,0.13853759765625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,2,128,1,fp8,fp8,0,0.13794560432434083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,float16,0,0.20551838874816894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,float16,fp8,0,0.13902560472488404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,12,4,128,1,fp8,fp8,0,0.13815840482711791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,float16,0,0.1850767970085144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,float16,fp8,0,0.09286400079727172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,12,128,1,fp8,fp8,0,0.09323840141296387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,float16,0,0.10039039850234985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,float16,0,0.11523200273513794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,float16,fp8,0,0.0864736020565033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,1,128,1,fp8,fp8,0,0.08647840023040772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,float16,0,0.10591520071029663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,float16,fp8,0,0.08663520216941833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,2,128,1,fp8,fp8,0,0.0871936023235321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,float16,fp8,0,0.08699679970741273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,12,4,128,1,fp8,fp8,0,0.08645280003547669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,float16,0,0.09187520146369935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,float16,fp8,0,0.055307197570800784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,12,128,1,fp8,fp8,0,0.05555199980735779
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,float16,0,0.06313440203666687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,float16,fp8,0,0.051528000831604005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,1,128,1,fp8,fp8,0,0.0520799994468689
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,float16,0,0.06705120205879211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,float16,fp8,0,0.05182240009307861
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,2,128,1,fp8,fp8,0,0.05166879892349243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,float16,0,0.07150880098342896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,float16,fp8,0,0.0515392005443573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,12,4,128,1,fp8,fp8,0,0.051497602462768556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,float16,0,1.0089967727661133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,float16,fp8,0,0.9086943626403808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,1,128,1,fp8,fp8,0,0.9086144447326661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,fp8,0,0.9079456329345703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,fp8,fp8,0,0.9071184158325195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,2,128,1,float16,float16,0,1.1690256118774414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,fp8,0,0.9068544387817383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,fp8,fp8,0,0.9080575942993164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,fp8,0,0.537113618850708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,fp8,fp8,0,0.5372015953063964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,float16,0,0.5167359828948974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,12,4,128,1,float16,float16,0,1.4924495697021485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,12,128,1,float16,float16,0,1.4011648178100586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,float16,fp8,0,0.4652080059051514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,1,128,1,fp8,fp8,0,0.46721758842468264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,fp8,0,0.46718559265136717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,float16,float16,0,0.5974480152130127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,2,128,1,fp8,fp8,0,0.4662576198577881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,fp8,fp8,0,0.46599998474121096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,fp8,0,0.46731200218200686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,12,4,128,1,float16,float16,0,0.7617743968963623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,fp8,0,0.28218879699707033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,fp8,fp8,0,0.28232479095458984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,12,128,1,float16,float16,0,0.7176144123077393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,float16,0,0.2765615940093994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,float16,fp8,0,0.24632480144500732
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,1,128,1,fp8,fp8,0,0.24619519710540771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,float16,0,0.3161007881164551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,float16,fp8,0,0.24683361053466796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,2,128,1,fp8,fp8,0,0.24694080352783204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,fp8,0,0.24676160812377929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,float16,float16,0,0.39913918972015383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,12,4,128,1,fp8,fp8,0,0.2462480068206787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,fp8,0,0.15524319410324097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,float16,float16,0,0.3756432056427002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,12,128,1,fp8,fp8,0,0.1549407958984375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,float16,0,0.15852479934692382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,float16,fp8,0,0.13630880117416383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,1,128,1,fp8,fp8,0,0.13725119829177856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,float16,0,0.17876640558242798
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,float16,fp8,0,0.13615520000457765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,2,128,1,fp8,fp8,0,0.13602720499038695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,float16,0,0.20684480667114258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,float16,fp8,0,0.0893567979335785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,float16,0,0.21821439266204834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,float16,fp8,0,0.13684799671173095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,12,4,128,1,fp8,fp8,0,0.13683520555496215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,12,128,1,fp8,fp8,0,0.09013919830322266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,float16,0,0.09335200190544128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,float16,fp8,0,0.08087679743766785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,1,128,1,fp8,fp8,0,0.0807263970375061
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,fp8,fp8,0,0.08137120008468628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,float16,0,0.09936320185661315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,float16,fp8,0,0.08031520247459412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,2,128,1,fp8,fp8,0,0.08115839958190918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,fp8,0,0.08082079887390137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,12,4,128,1,float16,float16,0,0.11707040071487426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,float16,0,0.10663039684295654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,float16,fp8,0,0.05771679878234863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,12,128,1,fp8,fp8,0,0.0576304018497467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,float16,0,0.06373760104179382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,float16,fp8,0,0.05305759906768799
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,1,128,1,fp8,fp8,0,0.05355839729309082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,float16,0,0.06821600198745728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,float16,fp8,0,0.05370079874992371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,2,128,1,fp8,fp8,0,0.05374720096588135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,float16,0,0.07447360157966613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,float16,fp8,0,0.0534991979598999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,12,4,128,1,fp8,fp8,0,0.05376960039138794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,float16,0,0.06600800156593323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,float16,fp8,0,0.04107840061187744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,12,128,1,fp8,fp8,0,0.04031839966773987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,float16,0,0.0488864004611969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,float16,fp8,0,0.03909119963645935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,1,128,1,fp8,fp8,0,0.037904000282287596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,float16,0,0.049326398968696596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,float16,fp8,0,0.03792960047721863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,2,128,1,fp8,fp8,0,0.0386352002620697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,float16,0,0.05348479747772217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,float16,fp8,0,0.03760479986667633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,fp8,fp8,0,0.5921023845672607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,12,4,128,1,fp8,fp8,0,0.03886080086231232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,float16,0,0.7744400024414062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,float16,0,0.654207992553711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,1,128,1,float16,fp8,0,0.5907631874084472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,float16,fp8,0,0.5917920112609864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,2,128,1,fp8,fp8,0,0.591542387008667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,fp8,0,0.5921055793762207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,fp8,fp8,0,0.5913440227508545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,12,4,128,1,float16,float16,0,1.0179743766784668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,fp8,0,0.35964159965515136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,fp8,fp8,0,0.3589423894882202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,float16,0,0.341923189163208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,12,128,1,float16,float16,0,1.0002639770507813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,float16,fp8,0,0.3067008018493652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,1,128,1,fp8,fp8,0,0.3075279951095581
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,float16,0,0.5234367847442627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,float16,0,0.40273118019104004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,float16,fp8,0,0.307041597366333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,2,128,1,fp8,fp8,0,0.307694411277771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,float16,fp8,0,0.306878399848938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,12,4,128,1,fp8,fp8,0,0.3067280054092407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,fp8,0,0.19224319458007813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,fp8,fp8,0,0.19175200462341307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,12,128,1,float16,float16,0,0.5156176090240479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,float16,0,0.19138879776000978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,float16,fp8,0,0.16569759845733642
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,1,128,1,fp8,fp8,0,0.16446880102157593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,fp8,0,0.16518399715423585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,float16,float16,0,0.21984319686889647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,2,128,1,fp8,fp8,0,0.16493920087814332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,float16,0,0.2786272048950195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,float16,fp8,0,0.1650720000267029
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,12,4,128,1,fp8,fp8,0,0.16673280000686647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,fp8,0,0.1084831953048706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,float16,float16,0,0.27438879013061523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,12,128,1,fp8,fp8,0,0.10772160291671753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,float16,0,0.10715839862823487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,fp8,fp8,0,0.09265599846839905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,float16,fp8,0,0.09288960099220275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,1,128,1,fp8,fp8,0,0.22214241027832032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,float16,0,0.12775360345840453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,2,128,1,float16,fp8,0,0.09314079880714417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,float16,0,0.15747679471969606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,float16,fp8,0,0.09348000288009643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,12,4,128,1,fp8,fp8,0,0.09271039962768554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,float16,0,0.15325599908828735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,float16,fp8,0,0.06395999789237976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,12,128,1,fp8,fp8,0,0.06399679780006409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,float16,0,0.0690768003463745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,float16,fp8,0,0.05678880214691162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,1,128,1,fp8,fp8,0,0.05696319937705994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,float16,0,0.0740559995174408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,float16,fp8,0,0.05674880146980286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,2,128,1,fp8,fp8,0,0.057708799839019775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,float16,0,0.08493760228157043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,float16,fp8,0,0.05722399950027466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,12,4,128,1,fp8,fp8,0,0.05697759985923767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,float16,0,0.07290080189704895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,float16,fp8,0,0.037212800979614255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,float16,0,0.04486719965934753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,12,128,1,fp8,fp8,0,0.03709439933300018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,float16,fp8,0,0.035076799988746646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,1,128,1,fp8,fp8,0,0.034974399209022525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,float16,0,0.04789279997348785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,float16,fp8,0,0.03495840132236481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,2,128,1,fp8,fp8,0,0.03506399989128113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,float16,0,0.05318560004234314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,float16,fp8,0,0.03516159951686859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,12,4,128,1,fp8,fp8,0,0.03500959873199463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,float16,0,0.05402399897575379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,float16,fp8,0,0.03306399881839752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,12,128,1,fp8,fp8,0,0.03313280045986176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,float16,0,0.04162240028381348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,float16,fp8,0,0.03094240128993988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,1,128,1,fp8,fp8,0,0.03094879984855652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,float16,0,0.043024000525474546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,float16,fp8,0,0.030964800715446474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,2,128,1,fp8,fp8,0,0.03097760081291199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,float16,0,0.045238399505615236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,float16,fp8,0,0.031033599376678468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,12,4,128,1,fp8,fp8,0,0.030990400910377504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,float16,0,0.7191391944885254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,float16,fp8,0,0.65589919090271
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,1,128,1,fp8,fp8,0,0.6557536125183105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,fp8,0,0.6560495853424072
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,float16,float16,0,0.8819199562072754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,2,128,1,fp8,fp8,0,0.6562431812286377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,fp8,0,0.6561456203460694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,fp8,fp8,0,0.6544320106506347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,fp8,0,0.4078383922576904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,fp8,fp8,0,0.4081376075744629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,12,4,128,1,float16,float16,0,1.2093600273132323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,float16,0,0.376145601272583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,float16,fp8,0,0.338374400138855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,12,128,1,float16,float16,0,1.2508031845092773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,1,128,1,fp8,fp8,0,0.3369519948959351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,float16,0,0.45643200874328616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,float16,fp8,0,0.33661439418792727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,2,128,1,fp8,fp8,0,0.3368736028671265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,fp8,0,0.33712000846862794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,float16,float16,0,0.6179952144622802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,12,4,128,1,fp8,fp8,0,0.33817119598388673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,fp8,0,0.2132352113723755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,fp8,fp8,0,0.21320159435272218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,12,128,1,float16,float16,0,0.6397439956665039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,float16,0,0.20547199249267578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,float16,fp8,0,0.17822400331497193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,1,128,1,fp8,fp8,0,0.17789599895477295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,float16,0,0.2449280023574829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,float16,fp8,0,0.17922240495681763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,2,128,1,fp8,fp8,0,0.17825599908828735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,float16,0,0.32366080284118653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,float16,fp8,0,0.17858879566192626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,12,4,128,1,fp8,fp8,0,0.17833759784698486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,fp8,0,0.1171455979347229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,float16,0,0.13953919410705568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,float16,float16,0,0.3341360092163086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,12,128,1,fp8,fp8,0,0.1167631983757019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,float16,0,0.1209712028503418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,float16,fp8,0,0.0973855972290039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,1,128,1,fp8,fp8,0,0.09747679829597473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,float16,fp8,0,0.09752640128135681
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,2,128,1,fp8,fp8,0,0.09724000096321106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,float16,0,0.1779296040534973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,float16,fp8,0,0.09913439750671386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,12,4,128,1,fp8,fp8,0,0.09936959743499756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,float16,0,0.18167359828948976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,float16,fp8,0,0.06734240055084229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,12,128,1,fp8,fp8,0,0.06628159880638122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,fp8,0,0.05824480056762695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,float16,0,0.06872320175170898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,float16,fp8,0,0.05800319910049438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,1,128,1,fp8,fp8,0,0.057796800136566163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,float16,float16,0,0.07519360184669495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,2,128,1,fp8,fp8,0,0.05803040266036987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,float16,0,0.09503999948501587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,float16,fp8,0,0.05823519825935364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,12,4,128,1,fp8,fp8,0,0.05820959806442261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,float16,0,0.09007840156555176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,float16,fp8,0,0.04304159879684448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,12,128,1,fp8,fp8,0,0.04296480119228363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,float16,0,0.048390400409698484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,float16,fp8,0,0.03844479918479919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,1,128,1,fp8,fp8,0,0.0387584000825882
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,float16,0,0.053232002258300784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,float16,fp8,0,0.03813279867172241
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,2,128,1,fp8,fp8,0,0.03844479918479919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,float16,0,0.058924800157546996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,float16,fp8,0,0.03903680145740509
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,12,4,128,1,fp8,fp8,0,0.03871200084686279
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,float16,0,0.05500159859657287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,float16,fp8,0,0.02882080078125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,12,128,1,fp8,fp8,0,0.02884480059146881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,float16,0,0.03557280004024506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,float16,fp8,0,0.026812800765037538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,1,128,1,fp8,fp8,0,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,float16,0,0.03706879913806915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,float16,fp8,0,0.026820799708366393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,2,128,1,fp8,fp8,0,0.02674719989299774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,float16,0,0.04160000085830688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,float16,fp8,0,0.026769599318504332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,12,4,128,1,fp8,fp8,0,0.026833599805831908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,float16,0,0.04334560036659241
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,float16,fp8,0,0.025551998615264894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,fp8,fp8,0,0.024908800423145295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,12,128,1,fp8,fp8,0,0.02696320116519928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,float16,0,0.03511039912700653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,float16,0,0.03512159883975983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,fp8,fp8,0,0.02492160052061081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,float16,fp8,0,0.024886399507522583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,1,128,1,fp8,fp8,0,0.024732799828052522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,float16,0,0.0350959986448288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,2,128,1,float16,fp8,0,0.024732799828052522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,fp8,0,0.5284255981445313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,12,4,128,1,float16,fp8,0,0.0247311994433403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,float16,float16,0,0.5897071838378907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,1,128,1,fp8,fp8,0,0.5279888153076172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,fp8,0,0.5272719860076904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,float16,float16,0,0.7497920036315918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,2,128,1,fp8,fp8,0,0.5268640041351318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,fp8,0,0.5274943828582763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,fp8,fp8,0,0.5272047996520997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,fp8,0,0.3403872013092041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,fp8,fp8,0,0.34014880657196045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,fp8,fp8,0,0.27155361175537107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,12,4,128,1,float16,float16,0,1.0719103813171387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,float16,0,0.3089744091033936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,1,128,1,float16,fp8,0,0.2711440086364746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,12,128,1,float16,float16,0,1.1756383895874023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,fp8,0,0.2708271980285645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,fp8,fp8,0,0.2710848093032837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,float16,0,0.38988161087036133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,float16,fp8,0,0.27128798961639405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,2,128,1,fp8,fp8,0,0.27154080867767333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,12,4,128,1,float16,float16,0,0.5473536014556885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,fp8,0,0.17776960134506226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,float16,0,0.17111040353775026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,fp8,fp8,0,0.17826559543609619
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,12,128,1,float16,float16,0,0.6001887798309327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,float16,fp8,0,0.14468319416046144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,1,128,1,fp8,fp8,0,0.14402400255203246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,float16,0,0.20898399353027344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,float16,fp8,0,0.1439120054244995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,2,128,1,fp8,fp8,0,0.14376000165939332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,fp8,fp8,0,0.09816640019416809
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,float16,0,0.2861648082733154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,float16,fp8,0,0.14425760507583618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,12,4,128,1,fp8,fp8,0,0.14408160448074342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,fp8,0,0.09787200093269348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,12,128,1,float16,float16,0,0.3130255937576294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,float16,0,0.09950559735298156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,float16,fp8,0,0.07833920121192932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,1,128,1,fp8,fp8,0,0.07825440168380737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,float16,0,0.11958400011062623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,float16,0,0.16926720142364501
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,float16,fp8,0,0.07902399897575378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,2,128,1,fp8,fp8,0,0.07921280264854431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,float16,0,0.15767199993133546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,float16,fp8,0,0.07996479868888855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,12,4,128,1,fp8,fp8,0,0.08005279898643494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,float16,fp8,0,0.05512319803237915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,12,128,1,fp8,fp8,0,0.05472000241279602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,float16,0,0.05779359936714172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,float16,fp8,0,0.04626719951629639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,1,128,1,fp8,fp8,0,0.045612800121307376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,float16,0,0.06397119760513306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,float16,fp8,0,0.045603200793266296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,2,128,1,fp8,fp8,0,0.04567199945449829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,float16,0,0.08225439786911011
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,float16,fp8,0,0.046454399824142456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,12,4,128,1,fp8,fp8,0,0.04642400145530701
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,float16,0,0.08025280237197877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,float16,fp8,0,0.03499679863452911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,12,128,1,fp8,fp8,0,0.03497599959373474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,float16,0,0.040003201365470885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,float16,fp8,0,0.03091199994087219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,1,128,1,fp8,fp8,0,0.030939200520515443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,float16,0,0.04515039920806885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,float16,fp8,0,0.030969598889350893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,2,128,1,fp8,fp8,0,0.03115839958190918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,float16,0,0.050967997312545775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,float16,fp8,0,0.03097440004348755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,12,4,128,1,fp8,fp8,0,0.03107840120792389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,float16,0,0.049200001358985904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,float16,fp8,0,0.02282239943742752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,12,128,1,fp8,fp8,0,0.02274879962205887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,float16,0,0.03068000078201294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,float16,fp8,0,0.020734399557113647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,1,128,1,fp8,fp8,0,0.02059520035982132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,float16,0,0.030929601192474364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,float16,fp8,0,0.020683200657367708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,2,128,1,fp8,fp8,0,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,float16,0,0.03502399921417236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,float16,fp8,0,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,12,4,128,1,fp8,fp8,0,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,float16,0,0.037294399738311765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,float16,fp8,0,0.02062239944934845
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,12,128,1,fp8,fp8,0,0.02062560021877289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,float16,0,0.02890079915523529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,float16,fp8,0,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,fp8,0,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,1,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,float16,0,0.028886398673057555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,float16,fp8,0,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,2,128,1,fp8,fp8,0,0.018801599740982056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,float16,float16,0,0.030840000510215758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,12,4,128,1,fp8,fp8,0,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,float16,0,0.03171519935131073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,float16,fp8,0,0.018641600012779237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,12,128,1,fp8,fp8,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,float16,0,0.02884800136089325
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,float16,fp8,0,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,1,128,1,fp8,fp8,0,0.018644799292087556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,float16,0,0.02893120050430298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,float16,fp8,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,2,128,1,fp8,fp8,0,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,float16,0,0.02922079861164093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,float16,fp8,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,12,4,128,1,fp8,fp8,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,float16,0,0.26840479373931886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,float16,fp8,0,0.23353281021118164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,1,128,1,fp8,fp8,0,0.2339103937149048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,float16,0,0.345796799659729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,float16,fp8,0,0.23387041091918945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,2,128,1,fp8,fp8,0,0.2336143970489502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,fp8,0,0.233953595161438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,float16,float16,0,0.504088020324707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,12,4,128,1,fp8,fp8,0,0.23417439460754394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,fp8,0,0.15879520177841186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,fp8,fp8,0,0.15890079736709595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,12,128,1,float16,float16,0,0.5778768062591553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,float16,0,0.14962079524993896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,float16,fp8,0,0.12478079795837402
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,1,128,1,fp8,fp8,0,0.1251520037651062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,float16,0,0.1880336046218872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,float16,fp8,0,0.12524319887161256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,float16,0,0.30283679962158205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,2,128,1,fp8,fp8,0,0.12482880353927613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,float16,0,0.2649424076080322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,float16,fp8,0,0.12508480548858641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,12,4,128,1,fp8,fp8,0,0.12492640018463134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,float16,fp8,0,0.08824319839477539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,12,128,1,fp8,fp8,0,0.08794400095939636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,float16,0,0.08981760144233704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,float16,fp8,0,0.06903679966926575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,1,128,1,fp8,fp8,0,0.06840800046920777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,float16,0,0.10920159816741944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,float16,fp8,0,0.06978880167007447
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,2,128,1,fp8,fp8,0,0.06982880234718322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,float16,0,0.14719200134277344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,float16,fp8,0,0.07004640102386475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,12,4,128,1,fp8,fp8,0,0.06979039907455445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,float16,0,0.1618783950805664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,float16,fp8,0,0.04737760126590729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,12,128,1,fp8,fp8,0,0.048163199424743654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,float16,0,0.05031520128250122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,float16,fp8,0,0.03912639915943146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,1,128,1,fp8,fp8,0,0.03928320109844208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,float16,0,0.05588799715042114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,float16,fp8,0,0.039273598790168764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,2,128,1,fp8,fp8,0,0.03921439945697784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,float16,0,0.07320640087127686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,float16,fp8,0,0.039156800508499144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,12,4,128,1,fp8,fp8,0,0.039192000031471254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,float16,0,0.07854560017585754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,float16,fp8,0,0.031097599864006044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,12,128,1,fp8,fp8,0,0.031092798709869383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,float16,0,0.03587520122528076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,float16,fp8,0,0.026796799898147584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,1,128,1,fp8,fp8,0,0.026862400770187377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,float16,0,0.04134880006313324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,float16,fp8,0,0.02683199942111969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,2,128,1,fp8,fp8,0,0.026868799328804018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,float16,0,0.04645920097827912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,float16,fp8,0,0.026948800683021544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,12,4,128,1,fp8,fp8,0,0.026872000098228453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,float16,0,0.04551520049571991
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,float16,fp8,0,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,12,128,1,fp8,fp8,0,0.020630399882793426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,float16,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,float16,fp8,0,0.016672000288963318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,1,128,1,fp8,fp8,0,0.018540799617767334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,float16,0,0.02887519896030426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,float16,fp8,0,0.018479999899864197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,2,128,1,fp8,fp8,0,0.017956799268722533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,float16,0,0.03298400044441223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,float16,fp8,0,0.018078400194644927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,12,4,128,1,fp8,fp8,0,0.01860000044107437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,fp8,fp8,0,0.01652639955282211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,float16,0,0.03517760038375854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,float16,fp8,0,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,12,128,1,fp8,fp8,0,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,float16,0,0.027020800113677978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,1,128,1,float16,fp8,0,0.016497600078582763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,fp8,0,0.016569599509239197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,float16,float16,0,0.026895999908447266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,2,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,float16,0,0.026870399713516235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,float16,fp8,0,0.016641600430011748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,12,4,128,1,fp8,fp8,0,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,float16,0,0.028935998678207397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,12,128,1,fp8,fp8,0,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,float16,float16,0,0.025775998830795288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,fp8,fp8,0,0.01454080045223236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,1,128,1,fp8,fp8,0,0.015292799472808838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,float16,0,0.027076798677444457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,float16,fp8,0,0.014691199362277984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,2,128,1,fp8,fp8,0,0.015385599434375763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,float16,0,0.025697600841522217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,12,4,128,1,float16,fp8,0,0.01488800048828125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,fp8,0,0.015323199331760406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,float16,float16,0,0.024830399453639983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,12,128,1,fp8,fp8,0,0.01573919951915741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,float16,0,0.024723200500011443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,float16,fp8,0,0.014660799503326416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,1,128,1,fp8,fp8,0,0.014716799557209014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,float16,0,0.02481119930744171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,float16,fp8,0,0.014732800424098969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,2,128,1,fp8,fp8,0,0.014790399372577668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,float16,0,0.02493920028209686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,float16,fp8,0,0.014894400537014008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,12,4,128,1,fp8,fp8,0,0.014696000516414643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,float16,0,0.1643440008163452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,float16,fp8,0,0.13753600120544435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,1,128,1,fp8,fp8,0,0.13767679929733276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,float16,0,0.20195040702819825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,float16,fp8,0,0.13778079748153688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,2,128,1,fp8,fp8,0,0.13758399486541747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,float16,0,0.27853760719299314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,float16,fp8,0,0.1376415967941284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,12,4,128,1,fp8,fp8,0,0.1374400019645691
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,fp8,0,0.09240639805793763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,float16,float16,0,0.30619039535522463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,12,128,1,fp8,fp8,0,0.0923088014125824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,float16,0,0.09524319767951965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,float16,fp8,0,0.0739952027797699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,1,128,1,fp8,fp8,0,0.07399680018424988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,float16,0,0.11457439661026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,float16,fp8,0,0.07393119931221008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,2,128,1,fp8,fp8,0,0.07397279739379883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,fp8,fp8,0,0.0514303982257843
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,float16,0,0.15213279724121093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,float16,fp8,0,0.07447839975357055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,12,4,128,1,fp8,fp8,0,0.07520319819450379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,float16,0,0.16557760238647462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,12,128,1,float16,fp8,0,0.051425600051879884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,float16,0,0.05350559949874878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,fp8,0,0.043075200915336606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,float16,fp8,0,0.042847999930381776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,1,128,1,fp8,fp8,0,0.042824000120162964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,float16,0,0.06219519972801209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,float16,fp8,0,0.04266560077667236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,2,128,1,fp8,fp8,0,0.04294080138206482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,float16,float16,0,0.08216639757156372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,12,4,128,1,fp8,fp8,0,0.0430512011051178
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,float16,0,0.03914879858493805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,float16,0,0.07822719812393189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,float16,fp8,0,0.029366400837898255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,fp8,fp8,0,0.0247871994972229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,12,128,1,fp8,fp8,0,0.028907200694084166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,float16,0,0.04536480009555817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,float16,0,0.034944000840187076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,float16,fp8,0,0.024872000515460967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,1,128,1,fp8,fp8,0,0.024846400320529937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,2,128,1,float16,fp8,0,0.024903999269008638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,float16,fp8,0,0.024911999702453613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,12,4,128,1,fp8,fp8,0,0.024830399453639983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,float16,0,0.04733439981937408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,float16,fp8,0,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,12,128,1,fp8,fp8,0,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,float16,0,0.028907200694084166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,float16,fp8,0,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,1,128,1,fp8,fp8,0,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,float16,0,0.028870400786399842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,float16,fp8,0,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,2,128,1,fp8,fp8,0,0.018796800076961516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,float16,0,0.03387520015239716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,float16,fp8,0,0.018572799861431122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,12,4,128,1,fp8,fp8,0,0.01871519982814789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,float16,0,0.03129920065402984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,float16,fp8,0,0.014567999541759491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,12,128,1,fp8,fp8,0,0.013072000443935394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,float16,0,0.022910399734973906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,float16,fp8,0,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,1,128,1,fp8,fp8,0,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,float16,0,0.022838400304317476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,float16,fp8,0,0.012510399520397186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,2,128,1,fp8,fp8,0,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,float16,0,0.024715200066566467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,float16,fp8,0,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,12,4,128,1,fp8,fp8,0,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,float16,float16,0,0.024860799312591553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,12,128,1,fp8,fp8,0,0.012433599680662155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,float16,0,0.0226623997092247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,float16,fp8,0,0.011297599971294403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,1,128,1,fp8,fp8,0,0.012408000230789185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,float16,0,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,float16,fp8,0,0.01149120032787323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,2,128,1,fp8,fp8,0,0.012412799894809723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,float16,0,0.022675199806690215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,float16,fp8,0,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,12,4,128,1,fp8,fp8,0,0.01143999993801117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,float16,0,0.020785599946975708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,float16,fp8,0,0.012449599802494049
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,12,128,1,fp8,fp8,0,0.011407999694347382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,1,128,1,fp8,fp8,0,0.010599999874830245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,float16,0,0.020656000077724456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,float16,fp8,0,0.010480000078678131
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,2,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,float16,0,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,float16,fp8,0,0.01149279996752739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,12,4,128,1,fp8,fp8,0,0.010652799904346467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,float16,0,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,fp8,0,0.010552000254392624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,float16,fp8,0,0.010598400235176086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,12,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,float16,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,1,128,1,fp8,fp8,0,0.01061599999666214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,float16,float16,0,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,2,128,1,fp8,fp8,0,0.010577599704265594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,float16,0,0.020553599298000335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,12,4,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,float16,0,0.12265119552612305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,float16,fp8,0,0.09866880178451538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,1,128,1,fp8,fp8,0,0.09921600222587586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,float16,0,0.14215680360794067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,float16,fp8,0,0.09912480115890503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,2,128,1,fp8,fp8,0,0.09918400049209594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,float16,0,0.18071520328521729
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,float16,fp8,0,0.09992480278015137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,12,4,128,1,fp8,fp8,0,0.09896479845046997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,float16,0,0.17821120023727416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,float16,fp8,0,0.06373119950294495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,12,128,1,fp8,fp8,0,0.063755202293396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,float16,0,0.06720319986343384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,float16,fp8,0,0.05391680002212525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,1,128,1,fp8,fp8,0,0.05508319735527038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,float16,0,0.07517439723014832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,float16,fp8,0,0.05434079766273499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,2,128,1,fp8,fp8,0,0.05424799919128418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,float16,0,0.09628959894180297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,float16,fp8,0,0.054611200094223024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,fp8,fp8,0,0.031799998879432675
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,12,4,128,1,fp8,fp8,0,0.05419679880142212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,float16,0,0.0859008014202118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,float16,fp8,0,0.037041598558425905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,12,128,1,fp8,fp8,0,0.03691360056400299
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,float16,0,0.042716801166534424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,1,128,1,float16,fp8,0,0.032795199751853944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,float16,0,0.04736000001430511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,float16,fp8,0,0.031814399361610415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,2,128,1,fp8,fp8,0,0.03270240128040314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,float16,0,0.05347679853439331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,float16,fp8,0,0.03290559947490692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,12,4,128,1,fp8,fp8,0,0.03282560110092163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,float16,0,0.049425598978996274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,float16,fp8,0,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,12,128,1,fp8,fp8,0,0.022700800001621245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,float16,0,0.030870398879051207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,float16,fp8,0,0.020521600544452668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,1,128,1,fp8,fp8,0,0.020764799416065217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,float16,0,0.031113600730895995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,float16,fp8,0,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,2,128,1,fp8,fp8,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,float16,0,0.03517760038375854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,float16,fp8,0,0.02083680033683777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,12,4,128,1,fp8,fp8,0,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,float16,0,0.03479839861392975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,12,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,float16,0,0.02683520019054413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,float16,fp8,0,0.014603200554847717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,1,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,float16,0,0.026849600672721862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,2,128,1,fp8,fp8,0,0.014710399508476257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,float16,0,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,float16,fp8,0,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,12,4,128,1,fp8,fp8,0,0.014510400593280792
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,float16,0,0.024820800125598907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,float16,0,0.02276960015296936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,float16,fp8,0,0.012430399656295776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,12,128,1,fp8,fp8,0,0.012583999335765839
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,float16,0,0.020763200521469117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,float16,fp8,0,0.011030399799346923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,1,128,1,fp8,fp8,0,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,float16,fp8,0,0.010731200128793717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,2,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,float16,0,0.02267040014266968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,12,4,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,float16,float16,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,12,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,float16,0,0.02066880017518997
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,float16,fp8,0,0.010500799864530563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,float16,0,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,1,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,float16,fp8,0,0.0103472001850605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,float16,0,0.02070080041885376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,float16,0,0.018803200125694274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,12,4,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,float16,fp8,0,0.010487999767065048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,12,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,1,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,float16,0,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,float16,fp8,0,0.01085280030965805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,2,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,float16,0,0.01960960030555725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,12,4,128,1,fp8,fp8,0,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,float16,0,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,12,128,1,fp8,fp8,0,0.01053600013256073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,float16,0,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,1,128,1,fp8,fp8,0,0.010358399897813796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,2,128,1,fp8,fp8,0,0.010440000146627427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,float16,0,0.019089600443840025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,12,4,128,1,fp8,fp8,0,0.010436800122261048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,float16,0,0.10133600234985352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,float16,fp8,0,0.081768000125885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,1,128,1,fp8,fp8,0,0.08131679892539978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,float16,0,0.10568959712982177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,float16,fp8,0,0.08218240141868591
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,2,128,1,fp8,fp8,0,0.08188639879226685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,float16,0,0.12782880067825317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,float16,fp8,0,0.08208320140838624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,12,4,128,1,fp8,fp8,0,0.08132479786872863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,float16,0,0.1008687973022461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,float16,fp8,0,0.049358400702476504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,12,128,1,fp8,fp8,0,0.0494623988866806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,float16,0,0.05757920145988464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,float16,fp8,0,0.04541600048542023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,1,128,1,fp8,fp8,0,0.04541279971599579
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,float16,0,0.06388480067253113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,float16,fp8,0,0.045270401239395144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,2,128,1,fp8,fp8,0,0.045342400670051575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,float16,0,0.06822239756584167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,float16,fp8,0,0.04527359902858734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,12,4,128,1,fp8,fp8,0,0.04535360038280487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,float16,0,0.05783680081367493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,float16,fp8,0,0.030372801423072814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,12,128,1,fp8,fp8,0,0.030475199222564697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,float16,0,0.039241600036621097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,float16,fp8,0,0.027115198969841003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,1,128,1,fp8,fp8,0,0.027883198857307435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,float16,0,0.03914400041103363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,float16,fp8,0,0.027025601267814635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,2,128,1,fp8,fp8,0,0.02807359993457794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,float16,0,0.044284799695014955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,float16,0,0.028947201371192933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,float16,fp8,0,0.02707360088825226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,12,4,128,1,fp8,fp8,0,0.027139198780059815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,float16,0,0.0389167994260788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,float16,fp8,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,12,128,1,fp8,fp8,0,0.01870879977941513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,float16,fp8,0,0.018348799645900728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,1,128,1,fp8,fp8,0,0.017550399899482726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,float16,0,0.02901119887828827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,float16,fp8,0,0.018302400410175324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,2,128,1,fp8,fp8,0,0.01767359972000122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,float16,0,0.02943519949913025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,float16,fp8,0,0.01717440038919449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,12,4,128,1,fp8,fp8,0,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,float16,0,0.028891199827194215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,float16,fp8,0,0.014481599628925323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,12,128,1,fp8,fp8,0,0.014475199580192565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,float16,0,0.024766400456428528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,float16,fp8,0,0.014552000164985656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,1,128,1,fp8,fp8,0,0.013711999356746673
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,float16,0,0.024804799258708952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,float16,fp8,0,0.013672000169754029
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,2,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,float16,0,0.025227200984954835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,float16,fp8,0,0.014483200013637542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,12,4,128,1,fp8,fp8,0,0.013708800077438354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,float16,0,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,float16,0,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,12,128,1,fp8,fp8,0,0.010606399923563003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,float16,0,0.020660799741744996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,1,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,float16,0,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,float16,fp8,0,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,12,4,128,1,fp8,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,float16,0,0.01881760060787201
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,12,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,1,128,1,fp8,fp8,0,0.010824000090360641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,float16,0,0.01871040016412735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,float16,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,float16,0,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,12,4,128,1,fp8,fp8,0,0.009467200189828873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,float16,0,0.01860159933567047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,12,128,1,fp8,fp8,0,0.009455999732017517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,float16,0,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,float16,fp8,0,0.008619199693202972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,1,128,1,fp8,fp8,0,0.008606400340795517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,float16,0,0.01863359957933426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,float16,fp8,0,0.008736000210046769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,2,128,1,fp8,fp8,0,0.009332799911499023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,float16,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,float16,fp8,0,0.008780799806118011
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,12,4,128,1,fp8,fp8,0,0.00952960029244423
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,float16,0,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,float16,fp8,0,0.008822400122880936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,12,128,1,fp8,fp8,0,0.008886399865150451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,float16,0,0.01709440052509308
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,float16,fp8,0,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,1,128,1,fp8,fp8,0,0.009567999839782714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,float16,0,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,float16,fp8,0,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,2,128,1,fp8,fp8,0,0.009799999743700027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,float16,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,float16,fp8,0,0.010014399886131287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,12,4,128,1,fp8,fp8,0,0.009712000191211701
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,float16,0,0.09455999732017517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,float16,fp8,0,0.07309280037879944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,1,128,1,fp8,fp8,0,0.0741375982761383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,float16,0,0.09893119931221009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,float16,fp8,0,0.07326239943504334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,2,128,1,fp8,fp8,0,0.07365440130233765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,float16,0,0.10543040037155152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,float16,fp8,0,0.07430880069732666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,12,4,128,1,fp8,fp8,0,0.07313920259475708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,float16,0,0.07592960000038147
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,float16,fp8,0,0.043505600094795226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,12,128,1,fp8,fp8,0,0.04320000112056732
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,float16,0,0.058006399869918825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,float16,fp8,0,0.041403201222419736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,1,128,1,fp8,fp8,0,0.04131839871406555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,float16,0,0.05783680081367493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,float16,fp8,0,0.041643199324607846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,2,128,1,fp8,fp8,0,0.041510400176048276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,float16,0,0.06295199990272522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,float16,fp8,0,0.04131839871406555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,12,4,128,1,fp8,fp8,0,0.04177759885787964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,float16,0,0.04733920097351074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,float16,fp8,0,0.027112001180648805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,12,128,1,fp8,fp8,0,0.026756799221038817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,float16,0,0.039094400405883786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,float16,fp8,0,0.025017601251602174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,1,128,1,fp8,fp8,0,0.02563839852809906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,float16,0,0.03919360041618347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,float16,fp8,0,0.02484479993581772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,2,128,1,fp8,fp8,0,0.024822400510311128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,float16,0,0.03940800130367279
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,float16,fp8,0,0.025011199712753295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,12,4,128,1,fp8,fp8,0,0.025113600492477416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,float16,0,0.03302719891071319
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,float16,fp8,0,0.016708800196647645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,12,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,float16,0,0.02893120050430298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,float16,fp8,0,0.01703200042247772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,1,128,1,fp8,fp8,0,0.016543999314308167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,float16,0,0.02880159914493561
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,float16,fp8,0,0.01669919937849045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,2,128,1,fp8,fp8,0,0.016604800522327424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,float16,0,0.028918400406837463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,float16,fp8,0,0.016648000478744505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,fp8,fp8,0,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,12,4,128,1,fp8,fp8,0,0.01674239933490753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,float16,0,0.023907199501991272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,float16,fp8,0,0.013174399733543396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,12,128,1,fp8,fp8,0,0.012689599394798278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,float16,0,0.023296000063419343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,1,128,1,float16,fp8,0,0.012539200484752655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,float16,0,0.023048000037670137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,float16,fp8,0,0.0124719999730587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,2,128,1,fp8,fp8,0,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,float16,0,0.024695999920368195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,float16,fp8,0,0.012683199346065521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,12,4,128,1,fp8,fp8,0,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,float16,0,0.018987199664115904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,12,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,1,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,float16,0,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,fp8,fp8,0,0.01043360009789467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,2,128,1,fp8,fp8,0,0.010639999806880952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,float16,0,0.019864000380039215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,float16,0,0.018747200071811677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,12,4,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,12,128,1,float16,float16,0,0.018692800402641298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,float16,fp8,0,0.010366400331258773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,1,128,1,fp8,fp8,0,0.009505599737167358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,float16,fp8,0,0.009593600034713745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,2,128,1,fp8,fp8,0,0.009483200311660767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,float16,0,0.01876640021800995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,float16,fp8,0,0.01029760017991066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,12,4,128,1,fp8,fp8,0,0.008564800024032593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,float16,0,0.018699200451374055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,float16,fp8,0,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,12,128,1,fp8,fp8,0,0.009726399928331375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,float16,0,0.018668800592422485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,float16,0,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,float16,fp8,0,0.01008159965276718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,1,128,1,fp8,fp8,0,0.008737599849700928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,float16,0,0.018195199966430663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,float16,fp8,0,0.008667200058698653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,2,128,1,fp8,fp8,0,0.010361599922180175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,float16,fp8,0,0.008683200180530547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,12,4,128,1,fp8,fp8,0,0.00851840004324913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,float16,0,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,float16,fp8,0,0.010080000013113022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,12,128,1,fp8,fp8,0,0.008798400312662125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,float16,0,0.016735999286174773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,float16,fp8,0,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,1,128,1,fp8,fp8,0,0.00896959975361824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,float16,0,0.01789119988679886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,float16,fp8,0,0.008460800349712371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,2,128,1,fp8,fp8,0,0.009364800155162811
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,float16,fp8,0,0.008931200206279754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,12,4,128,1,fp8,fp8,0,0.008550400286912918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,fp8,0,2.9283599853515625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,fp8,fp8,0,2.9318159103393553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,1,128,1,float16,float16,0,3.6479297637939454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,fp8,0,2.9243663787841796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,fp8,fp8,0,2.9230672836303713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,2,128,1,float16,float16,0,3.6475887298583984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,fp8,0,2.9233503341674805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,float16,float16,0,4.083334350585938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,fp8,fp8,0,1.5516127586364745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,fp8,0,1.902662467956543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,float16,0,1.7771263122558594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,8,128,1,float16,float16,0,2.3559728622436524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,fp8,fp8,0,1.5247008323669433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,8,4,128,1,fp8,fp8,0,2.9393487930297852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,1,128,1,float16,fp8,0,1.906153678894043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,float16,0,2.023788833618164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,float16,fp8,0,1.5271856307983398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,2,128,1,fp8,fp8,0,1.7357599258422851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,fp8,0,1.5832159996032715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,float16,0,1.2522303581237793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,fp8,fp8,0,1.5255743980407714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,8,4,128,1,float16,float16,0,2.0455583572387694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,float16,fp8,0,0.9717887878417969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,8,128,1,fp8,fp8,0,0.8295616149902344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,float16,0,0.9501759529113769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,float16,fp8,0,0.8309696197509766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,1,128,1,fp8,fp8,0,0.8353952407836914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,float16,0,0.9869711875915528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,float16,fp8,0,0.917950439453125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,2,128,1,fp8,fp8,0,0.9499648094177247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,float16,0,1.0618176460266113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,float16,fp8,0,0.8631936073303222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,float16,0,0.6805136203765869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,8,4,128,1,fp8,fp8,0,0.8272895812988281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,float16,fp8,0,0.48430719375610354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,8,128,1,fp8,fp8,0,0.5129327774047852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,float16,0,0.5526415824890136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,float16,fp8,0,0.48159360885620117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,1,128,1,fp8,fp8,0,0.487718391418457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,float16,0,0.5503007888793945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,float16,fp8,0,0.4757664203643799
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,float16,0,0.5957776069641113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,2,128,1,fp8,fp8,0,0.47730240821838377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,float16,fp8,0,0.516868782043457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,8,4,128,1,fp8,fp8,0,0.47555041313171387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,fp8,0,1.7339616775512696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,float16,float16,0,2.0251888275146483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,1,128,1,fp8,fp8,0,1.7346160888671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,fp8,0,1.729470443725586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,fp8,fp8,0,1.7331615447998048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,2,128,1,float16,float16,0,2.140127944946289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,fp8,0,1.7330720901489258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,float16,float16,0,2.402599906921387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,fp8,0,0.9160528182983398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,fp8,fp8,0,0.9169039726257324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,float16,0,1.0585200309753418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,fp8,fp8,0,0.9150959968566894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,8,128,1,float16,float16,0,1.6787696838378907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,1,128,1,float16,fp8,0,1.0352800369262696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,8,4,128,1,fp8,fp8,0,1.7350255966186523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,fp8,0,0.9149120330810547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,float16,float16,0,1.1571328163146972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,2,128,1,fp8,fp8,0,0.914094352722168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,fp8,0,0.9152447700500488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,fp8,0,0.5087584018707275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,float16,float16,0,0.7945424079895019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,fp8,fp8,0,1.0106351852416993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,8,4,128,1,float16,float16,0,1.315993595123291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,fp8,fp8,0,0.5048704147338867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,8,128,1,fp8,fp8,0,0.508070421218872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,float16,0,0.6217663764953614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,1,128,1,float16,fp8,0,0.5056047916412354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,float16,0,0.6034687995910645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,float16,fp8,0,0.5080639839172363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,2,128,1,fp8,fp8,0,0.5136256217956543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,float16,0,0.6956592082977295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,float16,fp8,0,0.505731201171875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,8,4,128,1,fp8,fp8,0,0.5062607765197754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,float16,0,0.46033759117126466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,float16,fp8,0,0.300932788848877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,8,128,1,fp8,fp8,0,0.29915039539337157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,float16,0,0.32718079090118407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,float16,fp8,0,0.3012592077255249
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,1,128,1,fp8,fp8,0,0.3128848075866699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,float16,0,0.33611040115356444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,float16,fp8,0,0.29712638854980467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,2,128,1,fp8,fp8,0,0.29856960773468016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,float16,0,0.37479519844055176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,float16,fp8,0,0.2979327917098999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,8,4,128,1,fp8,fp8,0,0.2991408109664917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,fp8,0,1.2570672035217285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,float16,float16,0,1.4624336242675782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,1,128,1,fp8,fp8,0,1.2515024185180663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,fp8,0,1.2532032012939454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,float16,float16,0,1.5606143951416016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,2,128,1,fp8,fp8,0,1.2498895645141601
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,fp8,0,1.2519696235656739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,fp8,0,0.6690576076507568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,fp8,fp8,0,0.6699711799621582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,float16,float16,0,1.753980827331543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,8,4,128,1,fp8,fp8,0,1.2513168334960938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,8,128,1,float16,float16,0,1.1828415870666504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,fp8,0,0.6668240070343018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,float16,float16,0,0.7980224132537842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,1,128,1,fp8,fp8,0,0.6660272121429444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,float16,0,0.8127663612365723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,float16,fp8,0,0.6683375835418701
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,2,128,1,fp8,fp8,0,0.6911856174468994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,fp8,0,0.7430607795715332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,float16,float16,0,0.9144751548767089
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,fp8,0,0.3764159917831421
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,8,4,128,1,fp8,fp8,0,0.6673967838287354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,fp8,fp8,0,0.38340959548950193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,float16,float16,0,0.6179168224334717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,8,128,1,fp8,fp8,0,0.3778496026992798
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,float16,0,0.4188191890716553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,1,128,1,float16,fp8,0,0.387609601020813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,float16,0,0.4465775966644287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,float16,fp8,0,0.37416958808898926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,2,128,1,fp8,fp8,0,0.3740000009536743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,float16,0,0.5034607887268067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,float16,fp8,0,0.37519359588623047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,8,4,128,1,fp8,fp8,0,0.37573919296264646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,float16,0,0.3473423957824707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,float16,fp8,0,0.2316864013671875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,8,128,1,fp8,fp8,0,0.23115038871765137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,float16,0,0.25109119415283204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,float16,fp8,0,0.2327631950378418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,1,128,1,fp8,fp8,0,0.2306976079940796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,float16,0,0.26048479080200193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,float16,fp8,0,0.23218879699707032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,2,128,1,fp8,fp8,0,0.23155839443206788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,float16,0,0.2849423885345459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,float16,fp8,0,0.23169119358062745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,8,4,128,1,fp8,fp8,0,0.23382558822631835
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,fp8,0,1.6250223159790038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,float16,float16,0,1.8856992721557617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,1,128,1,fp8,fp8,0,1.624033546447754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,fp8,0,1.6236783981323242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,float16,float16,0,2.079147148132324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,2,128,1,fp8,fp8,0,1.6209983825683594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,fp8,0,0.849027156829834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,float16,0,2.387256050109863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,fp8,fp8,0,0.8472047805786133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,float16,0,1.0338224411010741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,fp8,fp8,0,1.6250240325927734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,float16,fp8,0,0.8454959869384766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,8,128,1,float16,float16,0,1.6083887100219727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,8,4,128,1,float16,fp8,0,1.6341024398803712
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,1,128,1,fp8,fp8,0,0.8471936225891114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,fp8,0,0.8458080291748047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,fp8,fp8,0,0.9084639549255371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,2,128,1,float16,float16,0,1.0680303573608398
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,fp8,0,0.8470911979675293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,fp8,0,0.4600128173828125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,fp8,fp8,0,0.877126407623291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,8,4,128,1,float16,float16,0,1.221241569519043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,float16,float16,0,0.8000176429748536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,8,128,1,fp8,fp8,0,0.45882081985473633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,float16,0,0.5330080032348633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,float16,fp8,0,0.4670623779296875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,fp8,fp8,0,0.48787198066711424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,1,128,1,fp8,fp8,0,0.4572080135345459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,float16,0,0.5597104072570801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,2,128,1,float16,fp8,0,0.46104159355163576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,fp8,0,0.4584144115447998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,float16,float16,0,0.6390399932861328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,8,4,128,1,fp8,fp8,0,0.4636335849761963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,float16,0,0.4438943862915039
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,float16,fp8,0,0.2719727993011475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,8,128,1,fp8,fp8,0,0.2658655881881714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,fp8,0,0.2650608062744141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,float16,0,0.2878688097000122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,float16,0,0.3588736057281494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,float16,fp8,0,0.26282880306243894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,1,128,1,fp8,fp8,0,0.2624687910079956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,float16,float16,0,0.3127120018005371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,2,128,1,fp8,fp8,0,0.265393590927124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,float16,fp8,0,0.2636143922805786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,8,4,128,1,fp8,fp8,0,0.26392641067504885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,fp8,0,0.16942720413208007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,float16,float16,0,0.2542623996734619
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,fp8,0,0.1685696005821228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,8,128,1,fp8,fp8,0,0.16836479902267457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,float16,0,0.18392640352249146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,float16,fp8,0,0.16746560335159302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,1,128,1,fp8,fp8,0,0.17003680467605592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,float16,float16,0,0.19138879776000978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,2,128,1,fp8,fp8,0,0.16836639642715454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,float16,0,0.20471839904785155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,float16,fp8,0,0.168779194355011
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,8,4,128,1,fp8,fp8,0,0.1682096004486084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,float16,0,1.1349616050720215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,float16,fp8,0,0.9829423904418946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,1,128,1,fp8,fp8,0,0.9849648475646973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,fp8,0,0.9845536231994629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,float16,float16,0,1.2558320045471192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,2,128,1,fp8,fp8,0,0.9827407836914063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,fp8,0,0.9843040466308594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,fp8,0,0.5216335773468017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,fp8,fp8,0,0.9827103614807129
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,fp8,fp8,0,0.5204080104827881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,8,128,1,float16,float16,0,1.0128848075866699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,fp8,0,0.5188223838806152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,float16,float16,0,0.5976384162902832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,8,4,128,1,float16,float16,0,1.5019344329833983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,1,128,1,fp8,fp8,0,0.5184751987457276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,fp8,0,0.5178880214691162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,float16,float16,0,0.6520720005035401
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,2,128,1,fp8,fp8,0,0.5191071987152099
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,fp8,0,0.5192975997924805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,fp8,fp8,0,0.28828001022338867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,float16,float16,0,0.770908784866333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,8,4,128,1,fp8,fp8,0,0.5205503940582276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,fp8,0,0.2880703926086426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,8,128,1,float16,float16,0,0.5620255947113038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,float16,0,0.32681760787963865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,float16,fp8,0,0.2858992099761963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,1,128,1,fp8,fp8,0,0.28613920211791993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,float16,0,0.35494558811187743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,float16,fp8,0,0.2863856077194214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,2,128,1,fp8,fp8,0,0.28542399406433105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,float16,0,0.4172800064086914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,float16,fp8,0,0.2893471956253052
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,8,4,128,1,fp8,fp8,0,0.28825280666351316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,float16,0,0.2974560022354126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,float16,fp8,0,0.16928160190582275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,8,128,1,fp8,fp8,0,0.17021280527114868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,float16,0,0.18528480529785157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,float16,fp8,0,0.16966240406036376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,1,128,1,fp8,fp8,0,0.1710736036300659
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,float16,0,0.19593600034713746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,float16,fp8,0,0.16889760494232178
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,2,128,1,fp8,fp8,0,0.1690719962120056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,float16,0,0.23292961120605468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,float16,fp8,0,0.16898720264434813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,8,4,128,1,fp8,fp8,0,0.1691856026649475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,float16,0,0.1673375964164734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,float16,fp8,0,0.11079200506210327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,8,128,1,fp8,fp8,0,0.11088639497756958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,float16,0,0.12648160457611085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,fp8,fp8,0,0.1110576033592224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,float16,fp8,0,0.11051360368728638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,1,128,1,fp8,fp8,0,0.11120320558547973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,float16,0,0.12900320291519166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,2,128,1,float16,fp8,0,0.11135200262069703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,float16,0,0.1410367965698242
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,float16,fp8,0,0.11105920076370239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,8,4,128,1,fp8,fp8,0,0.1107375979423523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,float16,0,1.0999664306640624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,float16,fp8,0,0.9600288391113281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,1,128,1,fp8,fp8,0,0.9579680442810059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,fp8,0,0.9591232299804687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,float16,float16,0,1.2653152465820312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,2,128,1,fp8,fp8,0,0.959870433807373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,fp8,0,0.9652768135070801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,fp8,0,0.501313591003418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,fp8,fp8,0,0.5180992126464844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,fp8,fp8,0,0.9601327896118164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,8,4,128,1,float16,float16,0,1.5953311920166016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,float16,0,0.5706575870513916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,8,128,1,float16,float16,0,1.1358736038208008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,float16,fp8,0,0.49988479614257814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,1,128,1,fp8,fp8,0,0.4991744041442871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,float16,0,0.6566720008850098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,float16,fp8,0,0.5001728057861328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,2,128,1,fp8,fp8,0,0.4998608112335205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,fp8,0,0.4998432159423828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,float16,float16,0,0.8139200210571289
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,8,4,128,1,fp8,fp8,0,0.4996975898742676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,fp8,0,0.2713248014450073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,float16,float16,0,0.5883999824523926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,8,128,1,fp8,fp8,0,0.2746464014053345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,float16,0,0.30726239681243894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,float16,fp8,0,0.27037599086761477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,1,128,1,fp8,fp8,0,0.2717024087905884
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,float16,0,0.3474816083908081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,float16,fp8,0,0.26914401054382325
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,2,128,1,fp8,fp8,0,0.2698767900466919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,fp8,0,0.27255198955535886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,fp8,fp8,0,0.2708031892776489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,fp8,0,0.15656319856643677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,fp8,fp8,0,0.15669920444488525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,float16,float16,0,0.3195120096206665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,8,128,1,fp8,fp8,0,0.15693919658660888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,8,4,128,1,float16,float16,0,0.42711520195007324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,float16,0,0.17052479982376098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,1,128,1,float16,fp8,0,0.1545632004737854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,float16,0,0.19526560306549073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,float16,0,0.18030879497528077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,float16,fp8,0,0.15494240522384645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,2,128,1,fp8,fp8,0,0.15597280263900756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,float16,0,0.23886559009552003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,float16,fp8,0,0.15546239614486695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,8,4,128,1,fp8,fp8,0,0.1545024037361145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,float16,fp8,0,0.09865120053291321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,8,128,1,fp8,fp8,0,0.09866880178451538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,float16,0,0.11203999519348144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,float16,fp8,0,0.09826239943504333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,1,128,1,fp8,fp8,0,0.09970399737358093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,fp8,fp8,0,0.09865279793739319
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,float16,0,0.11836960315704345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,float16,fp8,0,0.09889600276947022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,2,128,1,fp8,fp8,0,0.09797279834747315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,float16,0,0.13249759674072265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,8,4,128,1,float16,fp8,0,0.09904159903526306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,float16,0,0.09598079919815064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,float16,fp8,0,0.06244000196456909
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,fp8,0,0.0625104010105133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,8,128,1,fp8,fp8,0,0.06204959750175476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,float16,0,0.07229440212249756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,float16,fp8,0,0.062324798107147215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,1,128,1,fp8,fp8,0,0.0616815984249115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,float16,float16,0,0.0759168028831482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,2,128,1,fp8,fp8,0,0.06167680025100708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,float16,0,0.08280799984931946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,float16,fp8,0,0.062483197450637816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,fp8,0,0.601420783996582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,8,4,128,1,fp8,fp8,0,0.06177279949188232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,float16,float16,0,0.6865327835083008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,1,128,1,fp8,fp8,0,0.5999648094177246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,fp8,0,0.6028240203857422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,float16,float16,0,0.8074416160583496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,2,128,1,fp8,fp8,0,0.6016751766204834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,fp8,0,0.3200176000595093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,fp8,0,0.6007936000823975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,fp8,fp8,0,0.3473583936691284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,fp8,fp8,0,0.6010799884796143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,8,4,128,1,float16,float16,0,1.054297637939453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,float16,0,0.3616895914077759
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,8,128,1,float16,float16,0,0.7823440074920655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,float16,fp8,0,0.3176975965499878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,1,128,1,fp8,fp8,0,0.3166591882705688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,float16,0,0.432041597366333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,fp8,0,0.3173487901687622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,float16,fp8,0,0.31676321029663085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,2,128,1,fp8,fp8,0,0.31605119705200196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,float16,0,0.4100319862365723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,float16,float16,0,0.542855978012085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,8,4,128,1,fp8,fp8,0,0.31889441013336184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,float16,fp8,0,0.18018879890441894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,8,128,1,fp8,fp8,0,0.17660319805145264
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,float16,0,0.20316801071166993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,float16,fp8,0,0.1738319993019104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,1,128,1,fp8,fp8,0,0.17418240308761596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,float16,0,0.23280320167541504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,float16,fp8,0,0.17635680437088014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,2,128,1,fp8,fp8,0,0.17638880014419556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,float16,0,0.29104321002960204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,float16,fp8,0,0.1751360058784485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,8,4,128,1,fp8,fp8,0,0.1752992033958435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,float16,0,0.22644319534301757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,float16,fp8,0,0.10437920093536376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,8,128,1,fp8,fp8,0,0.10513919591903687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,float16,0,0.11740000247955322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,float16,fp8,0,0.10315519571304321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,fp8,0,0.10399520397186279
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,1,128,1,fp8,fp8,0,0.10325119495391846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,float16,0,0.1276095986366272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,float16,fp8,0,0.10304800271987916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,2,128,1,fp8,fp8,0,0.10354399681091309
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,float16,float16,0,0.16337120532989502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,8,4,128,1,fp8,fp8,0,0.10344640016555787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,float16,0,0.11941280364990234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,float16,0,0.08525919914245605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,float16,fp8,0,0.06796320080757141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,float16,fp8,0,0.06757280230522156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,8,128,1,fp8,fp8,0,0.06775040030479432
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,float16,0,0.07973279953002929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,float16,fp8,0,0.06725119948387145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,1,128,1,fp8,fp8,0,0.06782879829406738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,2,128,1,fp8,fp8,0,0.06684160232543945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,float16,0,0.09505919814109802
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,float16,fp8,0,0.06781920194625854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,8,4,128,1,fp8,fp8,0,0.06757919788360596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,float16,0,0.07701600193977357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,float16,0,0.06167680025100708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,float16,fp8,0,0.049323201179504395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,8,128,1,fp8,fp8,0,0.049184000492095946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,float16,0,0.05801759958267212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,float16,fp8,0,0.049239999055862425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,1,128,1,fp8,fp8,0,0.049235200881958006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,float16,fp8,0,0.04921280145645142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,float16,0,0.0663648009300232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,2,128,1,fp8,fp8,0,0.049297600984573364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,float16,fp8,0,0.04935680031776428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,fp8,0,0.6207248210906983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,8,4,128,1,fp8,fp8,0,0.04940800070762634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,float16,float16,0,0.7095168113708497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,1,128,1,fp8,fp8,0,0.6213647842407226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,fp8,0,0.622430419921875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,float16,float16,0,0.8712224006652832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,2,128,1,fp8,fp8,0,0.6208672046661377
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,fp8,0,0.6208159923553467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,fp8,0,0.32390079498291013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,fp8,fp8,0,0.6213744163513184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,fp8,fp8,0,0.3256943941116333
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,fp8,fp8,0,0.3223695993423462
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,8,4,128,1,float16,float16,0,1.199129581451416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,float16,0,0.36900959014892576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,8,128,1,float16,float16,0,0.9324175834655761
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,1,128,1,float16,fp8,0,0.32226560115814207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,float16,0,0.44921278953552246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,float16,fp8,0,0.3224208116531372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,2,128,1,fp8,fp8,0,0.3247519969940186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,fp8,0,0.323851203918457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,fp8,fp8,0,0.32325439453125
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,8,4,128,1,float16,float16,0,0.610319995880127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,fp8,0,0.17562079429626465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,float16,float16,0,0.48158559799194334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,float16,0,0.24248960018157958
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,8,128,1,fp8,fp8,0,0.17570719718933106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,float16,0,0.20314559936523438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,fp8,0,0.17509759664535524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,float16,fp8,0,0.17530879974365235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,1,128,1,fp8,fp8,0,0.17411680221557618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,float16,fp8,0,0.17471359968185424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,2,128,1,fp8,fp8,0,0.1744047999382019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,fp8,fp8,0,0.10123519897460938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,float16,float16,0,0.3220463991165161
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,8,4,128,1,fp8,fp8,0,0.17467199563980101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,float16,0,0.2589600086212158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,8,128,1,float16,fp8,0,0.10220479965209961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,float16,0,0.1150480031967163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,float16,fp8,0,0.09878559708595276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,float16,0,0.18135839700698853
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,1,128,1,fp8,fp8,0,0.09960319995880126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,float16,0,0.13673919439315796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,float16,fp8,0,0.09924160242080689
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,2,128,1,fp8,fp8,0,0.09921600222587586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,float16,fp8,0,0.0990831971168518
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,8,4,128,1,fp8,fp8,0,0.09928799867630005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,float16,0,0.14389599561691285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,float16,fp8,0,0.061715197563171384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,8,128,1,fp8,fp8,0,0.061647999286651614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,float16,0,0.07417439818382263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,float16,fp8,0,0.06170079708099365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,1,128,1,fp8,fp8,0,0.06152960062026978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,float16,0,0.08083040118217469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,float16,fp8,0,0.06174399852752686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,2,128,1,fp8,fp8,0,0.061710399389266965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,float16,0,0.09612320065498352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,float16,fp8,0,0.0616815984249115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,8,4,128,1,fp8,fp8,0,0.0616752028465271
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,fp8,fp8,0,0.03914079964160919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,float16,0,0.0720624029636383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,float16,fp8,0,0.03908160030841827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,8,128,1,fp8,fp8,0,0.0391184002161026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,float16,0,0.04739840030670166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,1,128,1,float16,fp8,0,0.03912799954414368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,fp8,0,0.03915359973907471
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,float16,float16,0,0.05280159711837769
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,2,128,1,fp8,fp8,0,0.03907999992370605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,float16,0,0.05823519825935364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,float16,fp8,0,0.03933759927749634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,float16,0,0.05568000078201294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,8,4,128,1,fp8,fp8,0,0.03922080099582672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,float16,fp8,0,0.03526079952716828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,8,128,1,fp8,fp8,0,0.03516640067100525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,float16,0,0.045465600490570066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,float16,fp8,0,0.03501920104026794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,1,128,1,fp8,fp8,0,0.03508960008621216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,float16,0,0.045444801449775696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,float16,fp8,0,0.03516319990158081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,2,128,1,fp8,fp8,0,0.035094401240348815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,float16,0,0.46366081237792967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,float16,0,0.0496288001537323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,float16,fp8,0,0.03505280017852783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,8,4,128,1,fp8,fp8,0,0.03529599905014038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,float16,fp8,0,0.40661120414733887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,1,128,1,fp8,fp8,0,0.4088304042816162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,float16,0,0.5863759994506836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,fp8,fp8,0,0.40688161849975585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,2,128,1,float16,fp8,0,0.40728158950805665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,fp8,0,0.4079023838043213
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,fp8,fp8,0,0.4070256233215332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,fp8,0,0.21705920696258546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,8,4,128,1,float16,float16,0,0.8302607536315918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,fp8,fp8,0,0.2158224105834961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,8,128,1,float16,float16,0,0.6677328109741211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,float16,0,0.2489840030670166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,float16,fp8,0,0.2137120008468628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,1,128,1,fp8,fp8,0,0.21403679847717286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,float16,0,0.30859999656677245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,float16,fp8,0,0.2157088041305542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,fp8,0,0.11949280500411988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,2,128,1,fp8,fp8,0,0.21452479362487792
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,fp8,0,0.21517760753631593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,float16,float16,0,0.42820000648498535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,8,4,128,1,fp8,fp8,0,0.21465919017791749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,float16,0,0.17226239442825317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,float16,float16,0,0.34848799705505373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,8,128,1,fp8,fp8,0,0.11935199499130249
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,fp8,0,0.11733440160751343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,float16,float16,0,0.1457856059074402
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,fp8,fp8,0,0.1191823959350586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,1,128,1,fp8,fp8,0,0.11726720333099365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,float16,fp8,0,0.11748319864273071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,2,128,1,fp8,fp8,0,0.11753920316696168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,float16,0,0.23138880729675293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,8,4,128,1,float16,fp8,0,0.11867680549621581
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,float16,0,0.19041119813919066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,float16,fp8,0,0.06986879706382751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,8,128,1,fp8,fp8,0,0.06956639885902405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,float16,0,0.08156639933586121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,float16,fp8,0,0.06832479834556579
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,1,128,1,fp8,fp8,0,0.06846240162849426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,float16,0,0.09270079731941223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,float16,fp8,0,0.06899039745330811
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,2,128,1,fp8,fp8,0,0.06842880249023438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,float16,0,0.12588000297546387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,float16,fp8,0,0.06919839978218079
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,8,4,128,1,fp8,fp8,0,0.06916319727897643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,float16,0,0.09461920261383057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,float16,fp8,0,0.04423680007457733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,8,128,1,fp8,fp8,0,0.043582400679588316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,float16,0,0.05615839958190918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,float16,fp8,0,0.04335840046405792
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,1,128,1,fp8,fp8,0,0.043479999899864195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,float16,0,0.061590397357940675
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,float16,fp8,0,0.04386720061302185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,2,128,1,fp8,fp8,0,0.04340640008449555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,float16,0,0.07127040028572082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,float16,fp8,0,0.04443199932575226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,8,4,128,1,fp8,fp8,0,0.04355359971523285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,float16,0,0.05802239775657654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,float16,fp8,0,0.03107840120792389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,8,128,1,fp8,fp8,0,0.03094879984855652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,float16,0,0.041119998693466185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,float16,fp8,0,0.031044799089431762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,1,128,1,fp8,fp8,0,0.030937600135803222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,float16,0,0.04518879950046539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,float16,fp8,0,0.03089120090007782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,2,128,1,fp8,fp8,0,0.031001600623130798
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,float16,0,0.048979198932647704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,float16,fp8,0,0.03089759945869446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,8,4,128,1,fp8,fp8,0,0.03108479976654053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,float16,0,0.04754399955272674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,fp8,0,0.028838399052619933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,float16,fp8,0,0.02884640097618103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,8,128,1,fp8,fp8,0,0.028880000114440918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,float16,0,0.0391728013753891
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,float16,fp8,0,0.02893120050430298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,1,128,1,fp8,fp8,0,0.02893120050430298
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,float16,float16,0,0.03932960033416748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,2,128,1,fp8,fp8,0,0.02895359992980957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,float16,0,0.043163201212882994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,float16,fp8,0,0.02884640097618103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,8,4,128,1,fp8,fp8,0,0.028859201073646545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,float16,0,0.5165088176727295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,float16,fp8,0,0.45105438232421874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,1,128,1,fp8,fp8,0,0.4501840114593506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,fp8,0,0.4504176139831543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,float16,float16,0,0.6768911838531494
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,2,128,1,fp8,fp8,0,0.45109758377075193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,fp8,fp8,0,0.4492640018463135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,fp8,0,0.45218720436096194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,fp8,0,0.23539040088653565
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,float16,float16,0,0.830292797088623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,8,128,1,fp8,fp8,0,0.2359071969985962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,8,4,128,1,float16,float16,0,1.0015055656433105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,float16,0,0.27238879203796384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,fp8,fp8,0,0.2332240104675293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,1,128,1,float16,fp8,0,0.23400959968566895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,float16,0,0.35470559597015383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,float16,fp8,0,0.2340303897857666
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,2,128,1,fp8,fp8,0,0.2349423885345459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,fp8,fp8,0,0.1281424045562744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,fp8,0,0.23469440937042235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,float16,float16,0,0.5124144077301025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,8,4,128,1,fp8,fp8,0,0.23416318893432617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,fp8,0,0.1270799994468689
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,8,128,1,float16,float16,0,0.4279088020324707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,float16,0,0.1527951955795288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,float16,fp8,0,0.12583999633789061
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,fp8,0,0.12669919729232787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,1,128,1,fp8,fp8,0,0.12648799419403076
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,float16,0,0.1916208028793335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,float16,fp8,0,0.12594879865646363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,2,128,1,fp8,fp8,0,0.12577279806137084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,float16,float16,0,0.26939198970794676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,8,4,128,1,fp8,fp8,0,0.12651679515838624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,fp8,0,0.07225599884986877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,float16,0,0.10701440572738648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,float16,float16,0,0.2272144079208374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,8,128,1,fp8,fp8,0,0.07308480143547058
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,float16,0,0.08469280004501342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,float16,fp8,0,0.0702895998954773
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,1,128,1,fp8,fp8,0,0.07010239958763123
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,float16,fp8,0,0.07027360200881957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,2,128,1,fp8,fp8,0,0.07003520131111145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,float16,0,0.14941920042037965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,float16,fp8,0,0.07036479711532592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,8,4,128,1,fp8,fp8,0,0.07176640033721923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,float16,0,0.12137440443038941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,float16,fp8,0,0.0432671993970871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,8,128,1,fp8,fp8,0,0.043315199017524716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,float16,0,0.05435360074043274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,float16,fp8,0,0.04332000017166138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,1,128,1,fp8,fp8,0,0.04316000044345856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,float16,0,0.06036800146102905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,float16,fp8,0,0.04337440133094787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,2,128,1,fp8,fp8,0,0.04321120083332062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,float16,0,0.0750544011592865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,float16,fp8,0,0.043196800351142886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,8,4,128,1,fp8,fp8,0,0.0434688001871109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,float16,0,0.060038399696350095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,float16,fp8,0,0.0267984002828598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,8,128,1,fp8,fp8,0,0.026868799328804018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,float16,0,0.03507040143013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,float16,fp8,0,0.026836800575256347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,1,128,1,fp8,fp8,0,0.027020800113677978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,float16,0,0.04116159975528717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,float16,fp8,0,0.02683840095996857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,2,128,1,fp8,fp8,0,0.026940798759460448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,float16,0,0.04683679938316345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,float16,fp8,0,0.026867198944091796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,8,4,128,1,fp8,fp8,0,0.0267984002828598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,float16,0,0.045121601223945616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,float16,fp8,0,0.02476319968700409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,8,128,1,fp8,fp8,0,0.02468640059232712
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,float16,0,0.033190399408340454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,float16,fp8,0,0.02391040027141571
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,1,128,1,fp8,fp8,0,0.02449759989976883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,float16,0,0.03303520083427429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,float16,fp8,0,0.024396799504756927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,2,128,1,fp8,fp8,0,0.02330079972743988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,float16,0,0.03917439877986908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,float16,fp8,0,0.02481600046157837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,8,4,128,1,fp8,fp8,0,0.024432000517845155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,float16,0,0.03728480041027069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,float16,fp8,0,0.022819200158119203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,8,128,1,fp8,fp8,0,0.022732800245285033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,float16,0,0.03306719958782196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,float16,fp8,0,0.02272319942712784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,1,128,1,fp8,fp8,0,0.022707200050354003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,float16,0,0.03293440043926239
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,float16,fp8,0,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,2,128,1,fp8,fp8,0,0.022708800435066224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,float16,0,0.03333280086517334
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,float16,fp8,0,0.022787199914455415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,8,4,128,1,fp8,fp8,0,0.02271520048379898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,fp8,0,0.3650719881057739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,float16,float16,0,0.4290656089782715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,1,128,1,fp8,fp8,0,0.36512320041656493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,fp8,fp8,0,0.3657295942306519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,fp8,0,0.3669248104095459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,2,128,1,float16,float16,0,0.5900447845458985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,fp8,0,0.36492319107055665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,fp8,fp8,0,0.36516480445861815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,fp8,0,0.19041919708251953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,fp8,fp8,0,0.19101439714431762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,8,4,128,1,float16,float16,0,0.9082015991210938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,float16,0,0.2303663969039917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,float16,fp8,0,0.19055360555648804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,8,128,1,float16,float16,0,0.7793647766113281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,1,128,1,fp8,fp8,0,0.1892351984977722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,float16,0,0.3049855947494507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,float16,fp8,0,0.19030719995498657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,2,128,1,fp8,fp8,0,0.19046720266342163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,fp8,0,0.19078079462051392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,float16,0,0.12844959497451783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,float16,float16,0,0.4650896072387695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,8,4,128,1,fp8,fp8,0,0.1907583951950073
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,fp8,0,0.10235999822616577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,float16,float16,0,0.40133280754089357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,8,128,1,fp8,fp8,0,0.10295039415359497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,float16,fp8,0,0.10227359533309936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,1,128,1,fp8,fp8,0,0.10186400413513183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,float16,0,0.1670912027359009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,float16,fp8,0,0.10233119726181031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,2,128,1,fp8,fp8,0,0.10253280401229858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,float16,0,0.24384799003601074
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,float16,fp8,0,0.10294239521026612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,8,4,128,1,fp8,fp8,0,0.10258079767227173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,fp8,0,0.059006398916244505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,float16,float16,0,0.21148641109466554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,8,128,1,fp8,fp8,0,0.05813120007514953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,float16,0,0.07141919732093811
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,float16,fp8,0,0.05686079859733582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,1,128,1,fp8,fp8,0,0.05604959726333618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,float16,0,0.0925279974937439
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,float16,fp8,0,0.05586879849433899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,2,128,1,fp8,fp8,0,0.057076799869537356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,float16,0,0.13521599769592285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,float16,fp8,0,0.05634080171585083
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,8,4,128,1,fp8,fp8,0,0.05716000199317932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,float16,0,0.11370559930801391
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,float16,fp8,0,0.03498879969120026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,fp8,fp8,0,0.03385440111160278
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,8,128,1,fp8,fp8,0,0.03484320044517517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,float16,0,0.04443039894104004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,float16,fp8,0,0.034980800747871396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,1,128,1,fp8,fp8,0,0.03326399922370911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,float16,0,0.05144960284233093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,2,128,1,float16,fp8,0,0.03469760119915009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,float16,0,0.0658079981803894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,float16,fp8,0,0.03424960076808929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,8,4,128,1,fp8,fp8,0,0.03491199910640717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,float16,0,0.0536624014377594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,float16,fp8,0,0.021048000454902648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,8,128,1,fp8,fp8,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,float16,0,0.02884480059146881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,float16,fp8,0,0.02075839936733246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,1,128,1,fp8,fp8,0,0.02077919989824295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,float16,0,0.0345551997423172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,float16,fp8,0,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,2,128,1,fp8,fp8,0,0.02074880003929138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,float16,0,0.040931200981140135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,float16,fp8,0,0.020929600298404693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,8,4,128,1,fp8,fp8,0,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,float16,0,0.03875359892845154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,float16,fp8,0,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,8,128,1,fp8,fp8,0,0.01857919991016388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,float16,0,0.0270224004983902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,float16,fp8,0,0.018643200397491455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,1,128,1,fp8,fp8,0,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,float16,0,0.02880159914493561
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,float16,fp8,0,0.01855839937925339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,2,128,1,fp8,fp8,0,0.018619200587272643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,float16,0,0.0329008013010025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,float16,fp8,0,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,8,4,128,1,fp8,fp8,0,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,float16,0,0.030990400910377504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,float16,fp8,0,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,fp8,0,0.016574400663375854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,8,128,1,fp8,fp8,0,0.016553600132465363
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,float16,0,0.02680639922618866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,float16,fp8,0,0.01653600037097931
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,1,128,1,fp8,fp8,0,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,float16,float16,0,0.027035200595855714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,2,128,1,fp8,fp8,0,0.01669439971446991
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,float16,0,0.026931199431419372
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,float16,fp8,0,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,8,4,128,1,fp8,fp8,0,0.01661760061979294
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,float16,0,0.027692800760269164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,fp8,fp8,0,0.016697600483894348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,float16,fp8,0,0.016625599563121797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,fp8,0,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,8,128,1,fp8,fp8,0,0.01663679927587509
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,float16,0,0.02686080038547516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,float16,0,0.026820799708366393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,1,128,1,float16,fp8,0,0.01656319946050644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,float16,float16,0,0.02688960134983063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,2,128,1,fp8,fp8,0,0.016491200029850005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,8,4,128,1,fp8,fp8,0,0.0165583997964859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,float16,0,0.1973423957824707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,fp8,fp8,0,0.16258399486541747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,1,128,1,float16,fp8,0,0.1623152017593384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,float16,0,0.2750272035598755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,float16,fp8,0,0.1619984030723572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,fp8,0,0.0864575982093811
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,2,128,1,fp8,fp8,0,0.16232479810714723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,fp8,fp8,0,0.16264959573745727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,fp8,0,0.1623152017593384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,8,4,128,1,float16,float16,0,0.4325247764587402
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,float16,float16,0,0.38536159992218016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,8,128,1,fp8,fp8,0,0.08626239895820617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,float16,0,0.11103520393371583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,float16,fp8,0,0.08643199801445008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,1,128,1,fp8,fp8,0,0.08524479866027831
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,float16,0,0.14905439615249633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,float16,fp8,0,0.08625760078430175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,2,128,1,fp8,fp8,0,0.0859391987323761
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,float16,0,0.22740159034729004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,float16,fp8,0,0.08640639781951905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,8,4,128,1,fp8,fp8,0,0.0863215982913971
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,float16,0,0.2050640106201172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,float16,fp8,0,0.04982720017433166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,8,128,1,fp8,fp8,0,0.05087040066719055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,float16,0,0.059828799962997434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,float16,fp8,0,0.0483599990606308
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,1,128,1,fp8,fp8,0,0.04924800097942352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,float16,0,0.08407999873161316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,float16,0,0.10671199560165405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,float16,fp8,0,0.028883200883865357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,float16,fp8,0,0.049270400404930116
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,2,128,1,fp8,fp8,0,0.04940159916877747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,float16,0,0.1265920042991638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,float16,fp8,0,0.04927839934825897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,8,4,128,1,fp8,fp8,0,0.04939360022544861
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,8,128,1,fp8,fp8,0,0.02886880040168762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,float16,0,0.03852640092372894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,float16,fp8,0,0.0288239985704422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,1,128,1,fp8,fp8,0,0.028811201453208923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,float16,0,0.043952000141143796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,float16,fp8,0,0.028870400786399842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,2,128,1,fp8,fp8,0,0.02901279926300049
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,float16,0,0.060127997398376466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,float16,fp8,0,0.0287663996219635
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,8,4,128,1,fp8,fp8,0,0.028147199749946596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,float16,0,0.05074399709701538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,float16,fp8,0,0.01860480010509491
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,8,128,1,fp8,fp8,0,0.018596799671649934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,float16,0,0.026769599318504332
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,float16,fp8,0,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,fp8,fp8,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,1,128,1,fp8,fp8,0,0.018680000305175783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,float16,0,0.03097119927406311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,float16,fp8,0,0.01863040030002594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,2,128,1,fp8,fp8,0,0.018695999681949616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,float16,0,0.037064000964164734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,8,4,128,1,float16,fp8,0,0.018559999763965607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,float16,0,0.035235199332237246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,float16,fp8,0,0.015849600732326507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,8,128,1,fp8,fp8,0,0.014609600603580474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,float16,0,0.02478239983320236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,fp8,fp8,0,0.014616000652313232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,1,128,1,fp8,fp8,0,0.014585599303245544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,float16,0,0.02491839975118637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,2,128,1,fp8,fp8,0,0.014574399590492249
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,float16,0,0.02935360074043274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,8,4,128,1,float16,fp8,0,0.014521600306034088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,float16,0,0.028910401463508605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,float16,fp8,0,0.014497600495815277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,8,128,1,fp8,fp8,0,0.01456640064716339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,float16,0,0.024723200500011443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,1,128,1,fp8,fp8,0,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,float16,0,0.022888000309467315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,float16,fp8,0,0.0146479994058609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,2,128,1,fp8,fp8,0,0.014454400539398194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,float16,0,0.023633599281311035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,float16,fp8,0,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,8,4,128,1,fp8,fp8,0,0.014476799964904785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,float16,0,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,float16,fp8,0,0.012531200051307678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,8,128,1,fp8,fp8,0,0.013662399351596832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,float16,0,0.02322400063276291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,float16,fp8,0,0.012548799812793731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,1,128,1,fp8,fp8,0,0.012734399735927581
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,float16,0,0.024692800641059876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,float16,fp8,0,0.013027200102806091
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,2,128,1,fp8,fp8,0,0.012545600533485413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,float16,0,0.024932800233364104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,float16,fp8,0,0.01348160058259964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,8,4,128,1,fp8,fp8,0,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,float16,0,0.02327200025320053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,float16,fp8,0,0.012849600613117218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,8,128,1,fp8,fp8,0,0.012742400169372559
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,float16,0,0.022812800109386445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,fp8,fp8,0,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,1,128,1,float16,fp8,0,0.01266240030527115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,float16,0,0.021884800493717195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,float16,fp8,0,0.012639999389648438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,float16,0,0.1234287977218628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,2,128,1,fp8,fp8,0,0.0125231996178627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,float16,0,0.022761599719524385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,float16,fp8,0,0.01255040019750595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,8,4,128,1,fp8,fp8,0,0.012566399574279786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,float16,fp8,0,0.09661440253257751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,float16,0,0.16087039709091186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,fp8,0,0.09661920070648193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,1,128,1,fp8,fp8,0,0.09648159742355347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,float16,fp8,0,0.09680960178375245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,2,128,1,fp8,fp8,0,0.09650880098342896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,float16,float16,0,0.23792319297790526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,8,4,128,1,fp8,fp8,0,0.09702079892158508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,fp8,0,0.05350080132484436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,float16,float16,0,0.20720961093902587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,8,128,1,fp8,fp8,0,0.0534608006477356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,float16,0,0.06783040165901184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,float16,fp8,0,0.05146719813346863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,1,128,1,fp8,fp8,0,0.05138720273971557
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,float16,0,0.09049440026283265
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,float16,fp8,0,0.05145919919013977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,2,128,1,fp8,fp8,0,0.051641601324081424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,float16,0,0.1302448034286499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,float16,fp8,0,0.05149440169334411
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,8,4,128,1,fp8,fp8,0,0.051451200246810914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,float16,0,0.1108016014099121
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,float16,fp8,0,0.030799999833106995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,8,128,1,fp8,fp8,0,0.030828800797462464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,float16,0,0.0419295996427536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,float16,fp8,0,0.030979201197624207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,1,128,1,fp8,fp8,0,0.030979201197624207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,float16,0,0.047366398572921756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,float16,fp8,0,0.030865600705146788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,2,128,1,fp8,fp8,0,0.031036800146102904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,fp8,fp8,0,0.018783999979496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,float16,0,0.06167680025100708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,float16,fp8,0,0.030934399366378783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,8,4,128,1,fp8,fp8,0,0.031062400341033934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,float16,0,0.05229439735412598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,8,128,1,float16,fp8,0,0.019075199961662292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,float16,0,0.027102398872375488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,float16,fp8,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,1,128,1,fp8,fp8,0,0.018697600066661834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,float16,0,0.032913601398468016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,float16,fp8,0,0.018827199935913086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,2,128,1,fp8,fp8,0,0.018729600310325622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,float16,0,0.03907679915428162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,fp8,0,0.012641599774360657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,float16,fp8,0,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,8,4,128,1,fp8,fp8,0,0.018585599958896637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,float16,0,0.033236798644065854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,float16,fp8,0,0.01252640038728714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,8,128,1,fp8,fp8,0,0.012651200592517852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,float16,float16,0,0.022838400304317476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,1,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,float16,0,0.022694399952888487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,float16,fp8,0,0.012511999905109405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,fp8,fp8,0,0.010667199641466141
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,2,128,1,fp8,fp8,0,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,float16,0,0.0270224004983902
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,float16,fp8,0,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,8,4,128,1,fp8,fp8,0,0.012563200294971466
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,fp8,0,0.010985600203275681
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,float16,0,0.026491200923919676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,8,128,1,float16,fp8,0,0.01063840016722679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,float16,0,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,float16,fp8,0,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,1,128,1,fp8,fp8,0,0.012428800016641617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,2,128,1,float16,float16,0,0.022678400576114654
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,float16,0,0.021840000152587892
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,float16,fp8,0,0.010576000064611435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,8,4,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,float16,0,0.02134400010108948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,float16,fp8,0,0.010513599961996078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,8,128,1,fp8,fp8,0,0.010593599826097488
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,float16,0,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,float16,0,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,1,128,1,float16,fp8,0,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,float16,0,0.02131039947271347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,2,128,1,fp8,fp8,0,0.010460799932479859
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,8,4,128,1,fp8,fp8,0,0.010598400235176086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,float16,0,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,float16,fp8,0,0.010633599758148194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,8,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,float16,0,0.018811200559139252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,float16,fp8,0,0.010454399883747101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,1,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,float16,0,0.018694399297237395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,float16,fp8,0,0.01048159971833229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,2,128,1,fp8,fp8,0,0.01051200032234192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,float16,0,0.020745599269866945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,8,4,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,float16,0,0.019254399836063384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,8,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,float16,0,0.01876160055398941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,float16,fp8,0,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,1,128,1,fp8,fp8,0,0.010398399829864503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,float16,0,0.018739199638366698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,2,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,float16,0,0.019075199961662292
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,float16,fp8,0,0.010606399923563003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,8,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,float16,0,0.09190080165863038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,float16,fp8,0,0.07068319916725159
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,1,128,1,fp8,fp8,0,0.07017120122909545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,float16,0,0.112881600856781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,float16,fp8,0,0.07141759991645813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,2,128,1,fp8,fp8,0,0.07031999826431275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,float16,0,0.15202080011367797
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,float16,0,0.0523904025554657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,float16,fp8,0,0.07061920166015626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,8,4,128,1,fp8,fp8,0,0.07055839896202087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,float16,0,0.12243520021438599
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,float16,fp8,0,0.03922240138053894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,8,128,1,fp8,fp8,0,0.03932160139083862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,float16,fp8,0,0.03908640146255493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,1,128,1,fp8,fp8,0,0.03927839994430542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,float16,0,0.058166402578353885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,float16,fp8,0,0.03909760117530823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,2,128,1,fp8,fp8,0,0.039155200123786926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,float16,0,0.0743183970451355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,float16,fp8,0,0.03912160098552704
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,8,4,128,1,fp8,fp8,0,0.039182400703430174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,float16,0,0.05963680148124695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,float16,fp8,0,0.024875199794769286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,8,128,1,fp8,fp8,0,0.024748800694942473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,float16,0,0.033139199018478394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,float16,fp8,0,0.024716800451278685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,1,128,1,fp8,fp8,0,0.024715200066566467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,float16,0,0.03902559876441956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,float16,fp8,0,0.024833600223064422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,2,128,1,fp8,fp8,0,0.02478239983320236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,float16,0,0.04514240026473999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,float16,fp8,0,0.02473919987678528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,8,4,128,1,fp8,fp8,0,0.024743999540805816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,float16,0,0.03706879913806915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,8,128,1,fp8,fp8,0,0.016596800088882445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,float16,0,0.026812800765037538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,float16,fp8,0,0.015836800634860992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,1,128,1,fp8,fp8,0,0.016415999829769136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,float16,0,0.026774400472640993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,float16,fp8,0,0.01664479970932007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,2,128,1,fp8,fp8,0,0.016547200083732606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,float16,0,0.030958399176597595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,float16,fp8,0,0.01658080071210861
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,8,4,128,1,fp8,fp8,0,0.016257600486278535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,float16,0,0.027076798677444457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,float16,fp8,0,0.010704000294208527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,8,128,1,fp8,fp8,0,0.010684800148010255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,float16,0,0.022742399573326112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,float16,0,0.022782400250434875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,float16,fp8,0,0.010920000076293946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,1,128,1,fp8,fp8,0,0.01117440015077591
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,float16,0,0.022852799296379088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,2,128,1,fp8,fp8,0,0.012027200311422348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,8,4,128,1,fp8,fp8,0,0.012129600346088409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,float16,float16,0,0.02157440036535263
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,8,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,1,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,float16,0,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,float16,fp8,0,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,2,128,1,fp8,fp8,0,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,float16,float16,0,0.02073120027780533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,8,4,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,float16,fp8,0,0.010361599922180175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,8,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,float16,0,0.01908639967441559
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,float16,fp8,0,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,1,128,1,fp8,fp8,0,0.00990239977836609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,float16,0,0.018768000602722167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,fp8,0,0.009411200135946273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,2,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,float16,0,0.01964160054922104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,float16,fp8,0,0.010358399897813796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,8,4,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,float16,float16,0,0.018603199720382692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,8,128,1,fp8,fp8,0,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,float16,0,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,float16,fp8,0,0.010267200320959092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,fp8,0,0.010353600233793258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,1,128,1,fp8,fp8,0,0.010288000106811523
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,float16,0,0.018702399730682374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,float16,fp8,0,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,2,128,1,fp8,fp8,0,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,float16,float16,0,0.018799999356269838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,8,4,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,float16,0,0.01865759938955307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,8,128,1,fp8,fp8,0,0.010385599732398988
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,fp8,0,0.010419200360774993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,fp8,fp8,0,0.00957920029759407
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,1,128,1,float16,float16,0,0.01870400011539459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,float16,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,float16,fp8,0,0.008790399879217148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,2,128,1,fp8,fp8,0,0.010360000282526016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,float16,0,0.01858399957418442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,float16,fp8,0,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,8,4,128,1,fp8,fp8,0,0.010451199859380722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,fp8,0,0.05840960144996643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,float16,float16,0,0.07497119903564453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,1,128,1,fp8,fp8,0,0.058217602968215945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,float16,0,0.08260480165481568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,float16,fp8,0,0.05890079736709595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,2,128,1,fp8,fp8,0,0.05824480056762695
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,float16,0,0.09918239712715149
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,float16,fp8,0,0.058766400814056395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,8,4,128,1,fp8,fp8,0,0.058303999900817874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,float16,0,0.06989759802818299
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,float16,fp8,0,0.033025598526000975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,8,128,1,fp8,fp8,0,0.033851200342178346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,float16,0,0.045289599895477296
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,float16,fp8,0,0.03323040008544922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,1,128,1,fp8,fp8,0,0.033103999495506284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,float16,0,0.0496832013130188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,float16,fp8,0,0.033374398946762085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,2,128,1,fp8,fp8,0,0.033076798915863036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,float16,0,0.05570240020751953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,float16,fp8,0,0.0336656004190445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,8,4,128,1,fp8,fp8,0,0.03311040103435516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,float16,0,0.04344480037689209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,float16,fp8,0,0.020817600190639496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,8,128,1,fp8,fp8,0,0.02088160067796707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,float16,0,0.03315039873123169
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,float16,fp8,0,0.02072319984436035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,1,128,1,fp8,fp8,0,0.02072640061378479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,float16,0,0.03309440016746521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,float16,fp8,0,0.020742399990558623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,2,128,1,fp8,fp8,0,0.020776000618934632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,float16,0,0.03712800145149231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,float16,fp8,0,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,8,4,128,1,fp8,fp8,0,0.02204640060663223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,float16,0,0.03092319965362549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,fp8,0,0.014579200744628906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,float16,fp8,0,0.014472000300884247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,8,128,1,fp8,fp8,0,0.01451359987258911
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,float16,0,0.024774399399757386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,float16,fp8,0,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,1,128,1,fp8,fp8,0,0.0149167999625206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,float16,float16,0,0.02483839988708496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,2,128,1,fp8,fp8,0,0.01462399959564209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,float16,0,0.025409600138664244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,float16,fp8,0,0.014484800398349762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,8,4,128,1,fp8,fp8,0,0.014483200013637542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,float16,0,0.022731199860572815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,8,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,float16,0,0.02248159945011139
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,float16,fp8,0,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,1,128,1,fp8,fp8,0,0.010564800351858139
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,float16,0,0.022686399519443512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,float16,fp8,0,0.010499200224876404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,2,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,float16,0,0.020857599377632142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,8,4,128,1,fp8,fp8,0,0.010520000010728836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,float16,0,0.020609599351882935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,float16,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,8,128,1,fp8,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,float16,0,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,1,128,1,fp8,fp8,0,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,float16,0,0.019571200013160706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,2,128,1,fp8,fp8,0,0.009985599666833878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,float16,0,0.018700799345970152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,float16,fp8,0,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,8,4,128,1,fp8,fp8,0,0.010505600273609162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,float16,0,0.01881600022315979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,float16,fp8,0,0.009996800124645234
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,8,128,1,fp8,fp8,0,0.009988799691200256
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,float16,0,0.01865279972553253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,float16,fp8,0,0.009172800183296203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,1,128,1,fp8,fp8,0,0.009440000355243682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,float16,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,2,128,1,fp8,fp8,0,0.00857279971241951
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,float16,0,0.018713599443435668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,float16,0,0.018727999925613404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,float16,fp8,0,0.010179200023412705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,8,4,128,1,fp8,fp8,0,0.01008159965276718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,float16,0,0.018607999384403228
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,float16,fp8,0,0.008852799981832504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,8,128,1,fp8,fp8,0,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,float16,fp8,0,0.00846560001373291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,1,128,1,fp8,fp8,0,0.00844319984316826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,float16,0,0.018787199258804323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,float16,fp8,0,0.008582399785518646
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,2,128,1,fp8,fp8,0,0.008833599835634231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,float16,0,0.018628799915313722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,float16,fp8,0,0.008580800145864487
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,fp8,fp8,0,0.008524800091981888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,8,4,128,1,fp8,fp8,0,0.008551999926567078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,float16,0,0.018612800538539885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,8,128,1,fp8,fp8,0,0.008451200276613235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,float16,0,0.016739200055599212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,1,128,1,float16,fp8,0,0.009193599969148637
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,float16,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,float16,fp8,0,0.00936639979481697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,2,128,1,fp8,fp8,0,0.008665599673986436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,float16,float16,0,0.018727999925613404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,8,4,128,1,fp8,fp8,0,0.008542399853467941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,float16,0,0.07124480009078979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,float16,fp8,0,0.0539247989654541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,1,128,1,fp8,fp8,0,0.05358240008354187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,float16,0,0.07524639964103699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,fp8,fp8,0,0.05356320142745972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,2,128,1,float16,fp8,0,0.05369600057601929
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,float16,0,0.08096160292625428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,float16,fp8,0,0.05380319952964783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,8,4,128,1,fp8,fp8,0,0.053686398267745974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,float16,0,0.05587040185928345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,float16,fp8,0,0.03108479976654053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,8,128,1,fp8,fp8,0,0.03128640055656433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,float16,0,0.045097601413726804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,float16,fp8,0,0.031016001105308534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,1,128,1,fp8,fp8,0,0.03099200129508972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,float16,0,0.04530560076236725
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,float16,fp8,0,0.03147520124912262
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,2,128,1,fp8,fp8,0,0.031171199679374696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,float16,0,0.04947519898414612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,float16,fp8,0,0.031035199761390686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,8,4,128,1,fp8,fp8,0,0.03108159899711609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,float16,0,0.03720479905605316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,float16,fp8,0,0.02080159932374954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,8,128,1,fp8,fp8,0,0.020761600136756896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,float16,0,0.03296160101890564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,float16,fp8,0,0.020630399882793426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,1,128,1,fp8,fp8,0,0.02062080055475235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,float16,0,0.03293119966983795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,float16,fp8,0,0.020609599351882935
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,2,128,1,fp8,fp8,0,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,float16,0,0.03292160034179688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,float16,fp8,0,0.020721599459648132
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,8,4,128,1,fp8,fp8,0,0.0206496000289917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,float16,0,0.02500160038471222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,8,128,1,fp8,fp8,0,0.013510400056838989
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,float16,0,0.02476159930229187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,float16,fp8,0,0.014057600498199463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,1,128,1,fp8,fp8,0,0.0144896000623703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,float16,0,0.024798400700092316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,float16,fp8,0,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,2,128,1,fp8,fp8,0,0.013752000033855438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,float16,0,0.024820800125598907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,float16,fp8,0,0.01356479972600937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,8,4,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,float16,0,0.02083519995212555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,fp8,0,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,8,128,1,fp8,fp8,0,0.010529600083827972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,float16,0,0.020636799931526183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,float16,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,float16,float16,0,0.020729599893093108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,2,128,1,fp8,fp8,0,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,float16,0,0.020662400126457214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,8,4,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,float16,0,0.018771199882030486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,float16,fp8,0,0.010507199913263321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,8,128,1,fp8,fp8,0,0.010428799688816071
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,float16,0,0.018756799399852753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,float16,fp8,0,0.009505599737167358
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,1,128,1,fp8,fp8,0,0.010492800176143647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,float16,0,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,2,128,1,fp8,fp8,0,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,fp8,fp8,0,0.0094480000436306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,float16,0,0.018615999817848207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,8,4,128,1,fp8,fp8,0,0.010080000013113022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,float16,0,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,8,128,1,float16,fp8,0,0.008459199965000153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,fp8,0,0.008743999898433686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,float16,float16,0,0.01881919950246811
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,1,128,1,fp8,fp8,0,0.010339199751615524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,float16,0,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,float16,fp8,0,0.00857279971241951
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,2,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,float16,0,0.018644799292087556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,float16,fp8,0,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,8,4,128,1,fp8,fp8,0,0.009176000207662582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,float16,0,0.01855839937925339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,float16,fp8,0,0.009995199739933014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,8,128,1,fp8,fp8,0,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,float16,0,0.018726399540901183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,float16,0,0.016769599914550782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,float16,fp8,0,0.00923840031027794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,1,128,1,fp8,fp8,0,0.008771199733018875
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,fp8,0,0.01008479967713356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,float16,0,0.016847999393939973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,float16,fp8,0,0.008551999926567078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,2,128,1,fp8,fp8,0,0.009470400214195252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,float16,fp8,0,0.008601599931716919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,8,4,128,1,fp8,fp8,0,0.008428800106048583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,float16,float16,0,0.018671999871730804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,8,128,1,fp8,fp8,0,0.00920960009098053
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,float16,0,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,float16,fp8,0,0.008659200370311737
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,1,128,1,fp8,fp8,0,0.008563199639320373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,float16,0,0.018624000251293182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,float16,fp8,0,0.008399999886751174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,2,128,1,fp8,fp8,0,0.009481599926948548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,float16,0,0.01881600022315979
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,float16,fp8,0,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,8,4,128,1,fp8,fp8,0,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,fp8,0,1.5995120048522948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,float16,float16,0,1.8634351730346679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,1,128,1,fp8,fp8,0,1.5975600242614747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,fp8,0,0.8637536048889161
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,fp8,0,1.5974608421325684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,float16,float16,0,1.2290767669677733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,fp8,fp8,0,1.5991999626159668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,4,2,128,1,float16,float16,0,2.009998321533203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,4,128,1,fp8,fp8,0,0.8637663841247558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,fp8,0,0.8620575904846192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,float16,float16,0,1.1061599731445313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,1,128,1,fp8,fp8,0,0.8618127822875976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,fp8,0,0.8611503601074219
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,float16,float16,0,1.0625599861145019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,4,2,128,1,fp8,fp8,0,0.8631407737731933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,float16,0,0.7476143836975098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,float16,fp8,0,0.49470720291137693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,fp8,0,0.4940800189971924
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,4,128,1,fp8,fp8,0,0.4955632209777832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,float16,float16,0,0.5803152084350586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,1,128,1,fp8,fp8,0,0.49377918243408203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,float16,0,0.6122896194458007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,float16,fp8,0,0.4935647964477539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,4,2,128,1,fp8,fp8,0,0.499286413192749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,fp8,0,0.31050400733947753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,float16,float16,0,0.40244159698486326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,float16,0,0.3400399923324585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,4,128,1,fp8,fp8,0,0.3096224069595337
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,fp8,0,0.3128223896026611
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,float16,fp8,0,0.3092832088470459
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,1,128,1,fp8,fp8,0,0.3151295900344849
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,float16,float16,0,0.34910879135131834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,4,2,128,1,fp8,fp8,0,0.309934401512146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,float16,0,1.112990379333496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,float16,fp8,0,0.974300765991211
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,1,128,1,fp8,fp8,0,0.9677424430847168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,fp8,0,0.9691984176635742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,float16,float16,0,1.232583999633789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,float16,0,0.7861936092376709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,4,2,128,1,fp8,fp8,0,0.9699839591979981
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,float16,fp8,0,0.5346864223480224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,4,128,1,fp8,fp8,0,0.5607135772705079
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,float16,0,0.6040800094604493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,float16,fp8,0,0.5321968078613282
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,fp8,fp8,0,0.53263840675354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,float16,0,0.4393648147583008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,1,128,1,fp8,fp8,0,0.5321616172790528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,fp8,0,0.5337440013885498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,4,2,128,1,float16,float16,0,0.674232006072998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,float16,fp8,0,0.31502718925476075
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,4,128,1,fp8,fp8,0,0.31245439052581786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,float16,0,0.3369488000869751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,float16,fp8,0,0.3128751993179321
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,1,128,1,fp8,fp8,0,0.31088640689849856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,float16,0,0.37164480686187745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,float16,fp8,0,0.32212319374084475
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,4,2,128,1,fp8,fp8,0,0.31396639347076416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,float16,0,0.2521087884902954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,fp8,fp8,0,0.19988160133361815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,float16,fp8,0,0.20111520290374757
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,4,128,1,fp8,fp8,0,0.20047199726104736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,float16,0,0.2187488079071045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,1,128,1,float16,fp8,0,0.2009648084640503
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,float16,0,0.2307823896408081
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,float16,fp8,0,0.20070080757141112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,4,2,128,1,fp8,fp8,0,0.20059680938720703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,float16,0,0.8113535881042481
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,float16,fp8,0,0.7115280151367187
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,1,128,1,fp8,fp8,0,0.7112351894378662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,float16,0,0.9161520004272461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,float16,fp8,0,0.7155856132507324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,fp8,0,0.4397183895111084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,fp8,fp8,0,0.3983407974243164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,4,128,1,float16,float16,0,0.5970719814300537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,4,2,128,1,fp8,fp8,0,0.7121568202972413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,float16,0,0.448038387298584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,float16,0,0.5303071975708008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,float16,fp8,0,0.39662880897521974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,1,128,1,fp8,fp8,0,0.3985919952392578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,float16,fp8,0,0.39631359577178954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,4,2,128,1,fp8,fp8,0,0.3971872091293335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,fp8,0,0.24337759017944335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,float16,float16,0,0.3462752103805542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,4,128,1,fp8,fp8,0,0.25770719051361085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,float16,0,0.2618607997894287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,float16,fp8,0,0.2436079978942871
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,1,128,1,fp8,fp8,0,0.24255518913269042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,float16,0,0.28293280601501464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,float16,fp8,0,0.24182240962982177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,4,2,128,1,fp8,fp8,0,0.2423248052597046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,float16,0,0.18961119651794434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,float16,fp8,0,0.15198880434036255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,4,128,1,fp8,fp8,0,0.14803199768066405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,float16,0,0.16550719738006592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,float16,fp8,0,0.14787839651107787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,1,128,1,fp8,fp8,0,0.14749759435653687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,float16,0,0.1721503973007202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,float16,fp8,0,0.1491328001022339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,4,2,128,1,fp8,fp8,0,0.14833600521087648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,float16,0,1.0496864318847656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,float16,fp8,0,0.9184975624084473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,1,128,1,fp8,fp8,0,0.9194160461425781
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,fp8,0,0.9181936264038086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,fp8,0,0.49486560821533204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,fp8,fp8,0,0.9186863899230957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,4,2,128,1,float16,float16,0,1.215398406982422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,fp8,fp8,0,0.5118015766143799
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,4,128,1,float16,float16,0,0.8032112121582031
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,float16,0,0.5625936031341553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,float16,fp8,0,0.4952559947967529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,1,128,1,fp8,fp8,0,0.4949183940887451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,fp8,0,0.494049596786499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,float16,float16,0,0.6403584003448486
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,4,2,128,1,fp8,fp8,0,0.4928703784942627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,float16,0,0.44828319549560547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,float16,fp8,0,0.3093951940536499
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,4,128,1,fp8,fp8,0,0.282204794883728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,fp8,0,0.28135199546813966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,float16,float16,0,0.3137376070022583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,1,128,1,fp8,fp8,0,0.2797024011611938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,float16,0,0.3564703941345215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,float16,fp8,0,0.28111839294433594
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,4,2,128,1,fp8,fp8,0,0.2814032077789307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,float16,0,0.26713759899139405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,float16,fp8,0,0.17708640098571776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,4,128,1,fp8,fp8,0,0.1765887975692749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,fp8,fp8,0,0.17826399803161622
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,float16,0,0.19429440498352052
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,float16,fp8,0,0.17603039741516113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,1,128,1,fp8,fp8,0,0.17534079551696777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,float16,0,0.2067647933959961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,4,2,128,1,float16,fp8,0,0.17524960041046142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,float16,0,0.1443120002746582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,float16,fp8,0,0.11203839778900146
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,4,128,1,fp8,fp8,0,0.11225279569625854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,float16,0,0.12586400508880616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,float16,fp8,0,0.1116752028465271
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,1,128,1,fp8,fp8,0,0.11110719442367553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,float16,0,0.13055360317230225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,float16,fp8,0,0.11165759563446045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,4,2,128,1,fp8,fp8,0,0.11179200410842896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,float16,0,0.6506976127624512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,float16,fp8,0,0.5711567878723145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,1,128,1,fp8,fp8,0,0.5742720127105713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,fp8,0,0.5731743812561035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,float16,float16,0,0.7719632148742676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,4,2,128,1,fp8,fp8,0,0.5723728179931641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,float16,0,0.5342624187469482
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,float16,fp8,0,0.3146399974822998
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,4,128,1,fp8,fp8,0,0.31424798965454104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,float16,0,0.3559904098510742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,float16,fp8,0,0.3140575885772705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,1,128,1,fp8,fp8,0,0.31283841133117674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,float16,0,0.29721760749816895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,float16,0,0.4142127990722656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,float16,fp8,0,0.3133120059967041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,4,2,128,1,fp8,fp8,0,0.31303839683532714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,float16,fp8,0,0.18421119451522827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,4,128,1,fp8,fp8,0,0.18308639526367188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,float16,0,0.19742079973220825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,float16,fp8,0,0.1813264012336731
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,1,128,1,fp8,fp8,0,0.18252320289611818
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,float16,0,0.23334240913391113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,float16,fp8,0,0.1821328043937683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,4,2,128,1,fp8,fp8,0,0.18351199626922607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,float16,0,0.1656159996986389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,fp8,fp8,0,0.11711039543151855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,float16,fp8,0,0.11648160219192505
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,4,128,1,fp8,fp8,0,0.11585119962692261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,fp8,fp8,0,0.1173856019973755
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,float16,0,0.13019520044326782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,1,128,1,float16,fp8,0,0.11737920045852661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,float16,0,0.14019039869308472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,4,2,128,1,float16,fp8,0,0.11745920181274414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,float16,0,0.1130784034729004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,float16,fp8,0,0.08628640174865723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,4,128,1,fp8,fp8,0,0.0866047978401184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,fp8,0,0.08619040250778198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,float16,float16,0,0.09840800166130066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,1,128,1,fp8,fp8,0,0.08625919818878174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,float16,0,0.10251519680023194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,float16,fp8,0,0.08632799983024597
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,4,2,128,1,fp8,fp8,0,0.08643040060997009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,float16,0,0.6494143962860107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,float16,fp8,0,0.5707024097442627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,1,128,1,fp8,fp8,0,0.569700813293457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,fp8,0,0.5721424102783204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,float16,float16,0,0.8130831718444824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,fp8,0,0.30593280792236327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,4,2,128,1,fp8,fp8,0,0.5706960201263428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,float16,float16,0,0.5892864227294922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,4,128,1,fp8,fp8,0,0.3060256004333496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,float16,0,0.3484208106994629
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,float16,fp8,0,0.3045504093170166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,1,128,1,fp8,fp8,0,0.30739679336547854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,float16,0,0.42780160903930664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,float16,fp8,0,0.30511519908905027
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,4,2,128,1,fp8,fp8,0,0.30484960079193113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,float16,0,0.3191920042037964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,float16,fp8,0,0.17379039525985718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,fp8,0,0.17114720344543458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,4,128,1,fp8,fp8,0,0.17351200580596923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,float16,0,0.1958832025527954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,fp8,0,0.10684000253677368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,float16,fp8,0,0.17327040433883667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,1,128,1,fp8,fp8,0,0.17092959880828856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,float16,float16,0,0.23873438835144042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,4,2,128,1,fp8,fp8,0,0.17182400226593017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,float16,float16,0,0.18417760133743286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,4,128,1,fp8,fp8,0,0.10745439529418946
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,float16,0,0.1176144003868103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,float16,fp8,0,0.10701279640197754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,fp8,0,0.06666079759597779
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,1,128,1,fp8,fp8,0,0.10815199613571166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,float16,0,0.1346575975418091
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,float16,fp8,0,0.10702879428863525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,4,2,128,1,fp8,fp8,0,0.10667680501937866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,float16,float16,0,0.0964031994342804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,4,128,1,fp8,fp8,0,0.06582559943199158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,float16,0,0.07652159929275512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,float16,fp8,0,0.06650559902191162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,fp8,0,0.06072160005569458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,1,128,1,fp8,fp8,0,0.06620960235595703
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,float16,0,0.08292160034179688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,float16,fp8,0,0.06579520106315613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,4,2,128,1,fp8,fp8,0,0.06573600172996522
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,float16,float16,0,0.08015040159225464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,4,128,1,fp8,fp8,0,0.06025599837303162
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,float16,0,0.0699184000492096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,float16,fp8,0,0.05964319705963135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,1,128,1,fp8,fp8,0,0.0605791985988617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,float16,0,0.07379199862480164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,float16,fp8,0,0.0608959972858429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,fp8,fp8,0,0.36909120082855223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,4,2,128,1,fp8,fp8,0,0.059806400537490846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,float16,0,0.4213871955871582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,1,128,1,float16,fp8,0,0.369483208656311
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,float16,0,0.41163039207458496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,float16,0,0.541099214553833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,float16,fp8,0,0.37123680114746094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,float16,fp8,0,0.20261600017547607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,4,2,128,1,fp8,fp8,0,0.37088799476623535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,4,128,1,fp8,fp8,0,0.202128005027771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,float16,0,0.23219680786132812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,float16,fp8,0,0.20063040256500245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,1,128,1,fp8,fp8,0,0.20112960338592528
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,fp8,0,0.11673120260238648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,float16,0,0.29109599590301516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,float16,0,0.12965760231018067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,float16,fp8,0,0.20279200077056886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,4,2,128,1,fp8,fp8,0,0.2013535976409912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,float16,float16,0,0.22696640491485595
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,4,128,1,fp8,fp8,0,0.11664799451828003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,float16,fp8,0,0.11550559997558593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,1,128,1,fp8,fp8,0,0.11595040559768677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,float16,0,0.16533440351486206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,float16,fp8,0,0.11723519563674926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,float16,0,0.11994880437850952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,4,2,128,1,fp8,fp8,0,0.11656160354614258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,float16,fp8,0,0.07380959987640381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,4,128,1,fp8,fp8,0,0.07387199997901917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,float16,0,0.08525279760360718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,float16,fp8,0,0.07400799989700317
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,1,128,1,fp8,fp8,0,0.0739952027797699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,float16,0,0.09462559819221497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,float16,fp8,0,0.0739952027797699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,4,2,128,1,fp8,fp8,0,0.07377920150756836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,float16,0,0.07664160132408142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,float16,fp8,0,0.05170239806175232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,4,128,1,fp8,fp8,0,0.05154399871826172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,float16,0,0.06169599890708923
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,float16,fp8,0,0.051630401611328126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,1,128,1,fp8,fp8,0,0.051475197076797485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,float16,0,0.06731520295143127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,float16,fp8,0,0.05146880149841308
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,4,2,128,1,fp8,fp8,0,0.051744002103805545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,fp8,fp8,0,0.04735200107097626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,fp8,0,0.048942399024963376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,float16,0,0.06513280272483826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,float16,fp8,0,0.04756479859352112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,4,128,1,fp8,fp8,0,0.04832000136375427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,float16,0,0.05751360058784485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,float16,0,0.4493120193481445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,1,128,1,float16,fp8,0,0.04800159931182861
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,float16,float16,0,0.05987359881401062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,4,2,128,1,fp8,fp8,0,0.04737440049648285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,float16,fp8,0,0.39308159351348876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,1,128,1,fp8,fp8,0,0.39444479942321775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,float16,0,0.611033582687378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,float16,fp8,0,0.39240639209747313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,4,2,128,1,fp8,fp8,0,0.39231200218200685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,fp8,0,0.20969440937042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,float16,float16,0,0.4832608222961426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,4,128,1,fp8,fp8,0,0.2101680040359497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,float16,0,0.3217871904373169
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,float16,0,0.24289441108703613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,float16,fp8,0,0.21028480529785157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,fp8,fp8,0,0.11883039474487304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,1,128,1,fp8,fp8,0,0.2094127893447876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,float16,fp8,0,0.20858399868011473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,4,2,128,1,fp8,fp8,0,0.20936319828033448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,float16,0,0.1798416018486023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,float16,0,0.2589359998703003
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,4,128,1,float16,fp8,0,0.11771199703216553
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,float16,0,0.13742079734802246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,float16,fp8,0,0.11722079515457154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,1,128,1,fp8,fp8,0,0.11647520065307618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,float16,fp8,0,0.11582239866256713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,4,2,128,1,fp8,fp8,0,0.11672639846801758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,float16,0,0.14356640577316285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,float16,fp8,0,0.06987360119819641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,4,128,1,fp8,fp8,0,0.06986879706382751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,fp8,fp8,0,0.06998720169067382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,float16,0,0.0802191972732544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,float16,fp8,0,0.06990399956703186
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,1,128,1,fp8,fp8,0,0.07005760073661804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,float16,0,0.09395359754562378
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,4,2,128,1,float16,fp8,0,0.06988319754600525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,float16,0,0.07213600277900696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,float16,fp8,0,0.04339520037174225
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,4,128,1,fp8,fp8,0,0.04328159987926483
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,float16,0,0.05229439735412598
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,float16,fp8,0,0.04323999881744385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,1,128,1,fp8,fp8,0,0.04322879910469055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,float16,0,0.05883839726448059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,float16,0,0.04530239999294281
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,float16,fp8,0,0.043224000930786134
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,4,2,128,1,fp8,fp8,0,0.043243199586868286
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,float16,0,0.055979198217391966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,float16,fp8,0,0.037190398573875426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,4,128,1,fp8,fp8,0,0.03743360042572021
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,float16,fp8,0,0.037364798784255984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,1,128,1,fp8,fp8,0,0.03707039952278137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,float16,0,0.050203198194503786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,float16,fp8,0,0.037136000394821164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,4,2,128,1,fp8,fp8,0,0.037092798948287965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,fp8,fp8,0,0.035128000378608706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,float16,0,0.04943679869174957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,float16,fp8,0,0.03510720133781433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,4,128,1,fp8,fp8,0,0.03506399989128113
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,float16,0,0.04524320065975189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,1,128,1,float16,fp8,0,0.03499200046062469
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,float16,0,0.045212799310684205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,float16,fp8,0,0.03505280017852783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,4,2,128,1,fp8,fp8,0,0.0350816011428833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,float16,0,0.3089792013168335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,float16,fp8,0,0.2661344051361084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,1,128,1,fp8,fp8,0,0.2660432100296021
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,float16,0,0.4286191940307617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,fp8,fp8,0,0.1451024055480957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,float16,fp8,0,0.26621758937835693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,4,2,128,1,fp8,fp8,0,0.2668864011764526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,fp8,0,0.14491360187530516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,4,128,1,float16,float16,0,0.35045599937438965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,float16,0,0.17183040380477904
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,float16,fp8,0,0.1434671998023987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,1,128,1,fp8,fp8,0,0.14392640590667724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,float16,0,0.23099360466003419
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,float16,fp8,0,0.1439568042755127
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,4,2,128,1,fp8,fp8,0,0.14418400526046754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,float16,0,0.19113600254058838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,float16,fp8,0,0.08196480274200439
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,4,128,1,fp8,fp8,0,0.08208799958229065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,float16,0,0.09260799884796142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,float16,fp8,0,0.08154720067977905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,1,128,1,fp8,fp8,0,0.08130239844322204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,float16,0,0.12698240280151368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,float16,fp8,0,0.08228319883346558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,4,2,128,1,fp8,fp8,0,0.08177760243415833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,float16,0,0.09486240148544312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,float16,fp8,0,0.04967679977416992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,4,128,1,fp8,fp8,0,0.05071359872817993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,float16,0,0.06107680201530456
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,float16,fp8,0,0.05085279941558838
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,1,128,1,fp8,fp8,0,0.05105760097503662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,float16,0,0.07290080189704895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,float16,fp8,0,0.050072002410888675
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,fp8,0,0.03517279922962189
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,4,2,128,1,fp8,fp8,0,0.04940159916877747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,float16,0,0.05941600203514099
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,float16,fp8,0,0.03481760025024414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,4,128,1,fp8,fp8,0,0.03496800065040588
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,float16,float16,0,0.04359999895095825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,1,128,1,fp8,fp8,0,0.03458240032196045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,fp8,0,0.03505760133266449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,fp8,fp8,0,0.034913599491119385
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,4,2,128,1,float16,float16,0,0.049369600415229795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,float16,0,0.04730879962444305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,float16,fp8,0,0.030953601002693176
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,4,128,1,fp8,fp8,0,0.031027200818061828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,float16,0,0.03919680118560791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,float16,fp8,0,0.030884799361228944
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,float16,0,0.043188801407814024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,1,128,1,fp8,fp8,0,0.031014400720596313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,float16,fp8,0,0.031071999669075014
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,4,2,128,1,fp8,fp8,0,0.030899199843406677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,fp8,0,0.028915199637413024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,fp8,0,0.028867200016975403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,float16,float16,0,0.04320639967918396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,4,128,1,fp8,fp8,0,0.028916800022125246
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,float16,0,0.039155200123786926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,float16,fp8,0,0.028841599822044373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,1,128,1,fp8,fp8,0,0.02887679934501648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,float16,float16,0,0.039113599061965945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,4,2,128,1,fp8,fp8,0,0.02881760001182556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,float16,0,0.35294079780578613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,float16,fp8,0,0.30357120037078855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,1,128,1,fp8,fp8,0,0.3040143966674805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,fp8,fp8,0,0.16083840131759644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,fp8,0,0.30269761085510255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,float16,float16,0,0.5124959945678711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,4,2,128,1,fp8,fp8,0,0.303273606300354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,fp8,0,0.16125439405441283
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,4,128,1,float16,float16,0,0.43011999130249023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,float16,0,0.19181920289993287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,float16,fp8,0,0.16040480136871338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,1,128,1,fp8,fp8,0,0.160206401348114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,float16,0,0.2704047918319702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,float16,fp8,0,0.16059520244598388
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,4,2,128,1,fp8,fp8,0,0.16042239665985109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,float16,0,0.228003191947937
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,float16,fp8,0,0.08906880021095276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,4,128,1,fp8,fp8,0,0.08892319798469543
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,float16,0,0.10670720338821411
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,float16,0,0.12399040460586548
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,float16,fp8,0,0.08697440028190613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,1,128,1,fp8,fp8,0,0.0870191991329193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,float16,0,0.1497056007385254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,float16,fp8,0,0.08825759887695313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,4,2,128,1,fp8,fp8,0,0.08773120045661927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,float16,fp8,0,0.0514415979385376
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,4,128,1,fp8,fp8,0,0.05192480087280273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,float16,0,0.06064959764480591
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,float16,fp8,0,0.051419198513031006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,1,128,1,fp8,fp8,0,0.05143839716911316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,float16,0,0.07645919919013977
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,float16,fp8,0,0.0514847993850708
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,4,2,128,1,fp8,fp8,0,0.05145599842071533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,float16,0,0.05991680026054382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,float16,fp8,0,0.031001600623130798
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,4,128,1,fp8,fp8,0,0.03097760081291199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,float16,0,0.04071039855480194
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,float16,fp8,0,0.030883198976516722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,1,128,1,fp8,fp8,0,0.030963200330734252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,float16,0,0.04558559954166412
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,float16,0,0.03344480097293854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,float16,fp8,0,0.03100320100784302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,4,2,128,1,fp8,fp8,0,0.031091201305389404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,float16,0,0.04362399876117706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,float16,fp8,0,0.026023998856544495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,4,128,1,fp8,fp8,0,0.026590400934219362
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,float16,fp8,0,0.025916799902915955
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,1,128,1,fp8,fp8,0,0.025135999917984007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,float16,0,0.037615999579429626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,fp8,fp8,0,0.02276960015296936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,float16,fp8,0,0.02682720124721527
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,4,2,128,1,fp8,fp8,0,0.025135999917984007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,float16,0,0.03710559904575348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,4,128,1,float16,fp8,0,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,float16,0,0.03296639919281006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,float16,fp8,0,0.022867199778556824
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,1,128,1,fp8,fp8,0,0.022843199968338012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,float16,0,0.03301759958267212
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,float16,fp8,0,0.023083199560642243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,4,2,128,1,fp8,fp8,0,0.02311840057373047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,float16,0,0.032996800541877744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,float16,fp8,0,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,4,128,1,fp8,fp8,0,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,float16,0,0.03295519948005676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,float16,fp8,0,0.022703999280929567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,1,128,1,fp8,fp8,0,0.022726400196552275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,float16,0,0.033032000064849854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,float16,fp8,0,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,4,2,128,1,fp8,fp8,0,0.022711999714374542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,float16,0,0.3082927942276001
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,float16,fp8,0,0.2587104082107544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,4,1,128,1,fp8,fp8,0,0.2585664033889771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,fp8,0,0.258734393119812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,float16,float16,0,0.46643838882446287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,4,2,128,1,fp8,fp8,0,0.2581279993057251
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,fp8,0,0.13720639944076538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,float16,float16,0,0.40194239616394045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,4,128,1,fp8,fp8,0,0.13693599700927733
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,fp8,0,0.13596639633178711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,float16,float16,0,0.16671680212020873
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,1,128,1,fp8,fp8,0,0.13639999628067018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,fp8,0,0.13569600582122804
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,float16,float16,0,0.24623680114746094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,4,2,128,1,fp8,fp8,0,0.13682559728622437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,float16,0,0.21317760944366454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,float16,fp8,0,0.07501279711723327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,4,128,1,fp8,fp8,0,0.07509599924087525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,float16,0,0.0928816020488739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,float16,0,0.11475679874420167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,float16,fp8,0,0.04296959936618805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,float16,fp8,0,0.07387040257453918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,1,128,1,fp8,fp8,0,0.0737824022769928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,float16,0,0.13586080074310303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,float16,fp8,0,0.07400000095367432
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,4,2,128,1,fp8,fp8,0,0.07398719787597656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,4,128,1,fp8,fp8,0,0.043244799971580504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,float16,0,0.05384799838066101
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,float16,0,0.05134559869766235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,fp8,fp8,0,0.02486719936132431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,float16,fp8,0,0.042691200971603394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,1,128,1,fp8,fp8,0,0.04328800141811371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,float16,0,0.0698095977306366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,float16,fp8,0,0.043171200156211856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,4,2,128,1,fp8,fp8,0,0.043049600720405576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,4,128,1,float16,fp8,0,0.024820800125598907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,float16,0,0.03374559879302978
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,float16,fp8,0,0.02476799935102463
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,1,128,1,fp8,fp8,0,0.024831999838352204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,float16,0,0.03994719982147217
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,float16,fp8,0,0.0247871994972229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,4,2,128,1,fp8,fp8,0,0.024855999648571013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,float16,0,0.038555198907852174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,float16,fp8,0,0.020744000375270844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,4,128,1,fp8,fp8,0,0.020633600652217865
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,float16,0,0.027315199375152588
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,float16,fp8,0,0.020820799469947814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,1,128,1,fp8,fp8,0,0.02067359983921051
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,float16,0,0.03296639919281006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,float16,fp8,0,0.020768000185489653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,4,2,128,1,fp8,fp8,0,0.02067999988794327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,float16,0,0.03187519907951355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,float16,fp8,0,0.017828799784183502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,4,128,1,fp8,fp8,0,0.017574399709701538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,float16,0,0.026830399036407472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,float16,fp8,0,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,1,128,1,fp8,fp8,0,0.01722559928894043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,float16,0,0.0268095999956131
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,float16,fp8,0,0.01854719966650009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,4,2,128,1,fp8,fp8,0,0.017398400604724883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,float16,0,0.02727360129356384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,fp8,0,0.016568000614643096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,float16,fp8,0,0.01656000018119812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,4,128,1,fp8,fp8,0,0.016631999611854555
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,float16,0,0.02688960134983063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,float16,fp8,0,0.01664000004529953
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,1,128,1,fp8,fp8,0,0.016540800034999848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,float16,float16,0,0.026774400472640993
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,4,2,128,1,fp8,fp8,0,0.01681919991970062
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,float16,0,0.026841598749160766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,float16,fp8,0,0.016604800522327424
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,4,128,1,fp8,fp8,0,0.016607999801635742
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,float16,0,0.02688960134983063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,float16,fp8,0,0.016539199650287627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,1,128,1,fp8,fp8,0,0.016518400609493257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,float16,0,0.02683199942111969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,float16,fp8,0,0.016502399742603303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,4,2,128,1,fp8,fp8,0,0.016577599942684172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,float16,0,0.1496608018875122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,float16,fp8,0,0.11947519779205322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,4,1,128,1,fp8,fp8,0,0.11942239999771118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,float16,0,0.22782559394836427
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,float16,fp8,0,0.11987199783325195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,float16,0,0.08421120047569275
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,4,2,128,1,fp8,fp8,0,0.12062239646911621
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,float16,0,0.20445120334625244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,float16,fp8,0,0.06729919910430908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,4,128,1,fp8,fp8,0,0.06774719953536987
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,float16,fp8,0,0.06572960019111633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,1,128,1,fp8,fp8,0,0.06581439971923828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,float16,0,0.12740800380706788
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,float16,fp8,0,0.06577439904212952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,4,2,128,1,fp8,fp8,0,0.06570879817008972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,float16,0,0.10864479541778564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,float16,fp8,0,0.03722079992294312
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,4,128,1,fp8,fp8,0,0.037031999230384825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,float16,0,0.04529759883880615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,float16,fp8,0,0.03698239922523498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,1,128,1,fp8,fp8,0,0.03701440095901489
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,float16,0,0.058740800619125365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,float16,fp8,0,0.0370608001947403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,4,2,128,1,fp8,fp8,0,0.037143999338150026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,float16,0,0.051470398902893066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,float16,fp8,0,0.022819200158119203
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,4,128,1,fp8,fp8,0,0.022720000147819518
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,float16,0,0.03661440014839172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,float16,0,0.03131999969482422
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,float16,fp8,0,0.022697600722312927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,1,128,1,fp8,fp8,0,0.02263679951429367
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,float16,0,0.037124800682067874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,float16,fp8,0,0.022737599909305573
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,4,2,128,1,fp8,fp8,0,0.02282560020685196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,float16,fp8,0,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,4,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,float16,0,0.024991999566555022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,float16,fp8,0,0.016667200624942778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,1,128,1,fp8,fp8,0,0.017127999663352968
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,float16,0,0.028815999627113342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,float16,fp8,0,0.01672160029411316
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,4,2,128,1,fp8,fp8,0,0.01668799966573715
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,float16,0,0.028867200016975403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,float16,fp8,0,0.014609600603580474
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,4,128,1,fp8,fp8,0,0.014620800316333771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,float16,0,0.024775999784469604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,float16,fp8,0,0.014575999975204468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,1,128,1,fp8,fp8,0,0.014798399806022645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,float16,0,0.02476000040769577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,float16,fp8,0,0.014585599303245544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,4,2,128,1,fp8,fp8,0,0.01459999978542328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,float16,0,0.024959999322891235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,float16,fp8,0,0.014486399292945863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,4,128,1,fp8,fp8,0,0.014497600495815277
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,float16,0,0.02346719950437546
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,fp8,fp8,0,0.012625600397586822
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,1,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,float16,0,0.023100799322128295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,float16,fp8,0,0.01446239948272705
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,4,2,128,1,fp8,fp8,0,0.014550399780273438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,float16,0,0.024700799584388734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,4,128,1,float16,fp8,0,0.012558400630950928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,float16,0,0.02292959988117218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,float16,fp8,0,0.012614400684833526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,1,128,1,fp8,fp8,0,0.01257600039243698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,float16,0,0.02272319942712784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,fp8,0,0.012617599964141846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,float16,fp8,0,0.013659200072288514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,4,2,128,1,fp8,fp8,0,0.012564800679683685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,float16,0,0.02266400009393692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,float16,fp8,0,0.012467200309038163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,4,128,1,fp8,fp8,0,0.012667199969291687
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,float16,float16,0,0.02266560047864914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,1,128,1,fp8,fp8,0,0.012478400021791458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,float16,0,0.02269279956817627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,float16,fp8,0,0.012532800436019897
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,4,2,128,1,fp8,fp8,0,0.012495999783277511
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,float16,0,0.08994240164756775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,float16,fp8,0,0.06785280108451844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,4,1,128,1,fp8,fp8,0,0.06852800250053406
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,fp8,0,0.06857280135154724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,float16,float16,0,0.13097599744796753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,4,2,128,1,fp8,fp8,0,0.06865599751472473
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,float16,0,0.11124639511108399
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,float16,fp8,0,0.0392767995595932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,4,128,1,fp8,fp8,0,0.03912320137023926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,float16,0,0.0473904013633728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,float16,fp8,0,0.03915840089321136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,1,128,1,fp8,fp8,0,0.03909280002117157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,float16,0,0.06447200179100036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,float16,fp8,0,0.03919680118560791
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,4,2,128,1,fp8,fp8,0,0.03911519944667816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,float16,0,0.05216959714889526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,float16,fp8,0,0.022935999929904936
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,4,128,1,fp8,fp8,0,0.022728000581264497
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,float16,0,0.033036801218986514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,float16,fp8,0,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,1,128,1,fp8,fp8,0,0.022899200022220612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,float16,0,0.03889279961585999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,float16,fp8,0,0.02284960001707077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,fp8,0,0.014672000706195832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,4,2,128,1,fp8,fp8,0,0.022940799593925476
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,float16,0,0.033995199203491214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,float16,fp8,0,0.014591999351978302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,4,128,1,fp8,fp8,0,0.014699199795722961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,float16,float16,0,0.02274879962205887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,1,128,1,fp8,fp8,0,0.014644800126552582
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,fp8,fp8,0,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,float16,0,0.02884800136089325
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,float16,fp8,0,0.014616000652313232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,4,2,128,1,fp8,fp8,0,0.014692799746990204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,float16,0,0.02608479857444763
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,4,128,1,float16,fp8,0,0.012508800625801087
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,float16,0,0.022755199670791627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,float16,fp8,0,0.012654399871826172
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,1,128,1,fp8,fp8,0,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,float16,0,0.022622400522232057
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,float16,fp8,0,0.01247519999742508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,4,2,128,1,fp8,fp8,0,0.012409599870443344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,float16,0,0.020849600434303284
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,float16,fp8,0,0.010547199845314026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,float16,0,0.02080959975719452
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,float16,0,0.02067520022392273
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,4,128,1,fp8,fp8,0,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,float16,fp8,0,0.011425600200891495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,float16,0,0.020659199357032774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,float16,fp8,0,0.010595200210809707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,4,2,128,1,fp8,fp8,0,0.010476800054311753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,float16,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,4,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,float16,0,0.02067680060863495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,float16,fp8,0,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,1,128,1,fp8,fp8,0,0.010540799796581268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,float16,0,0.020747199654579163
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,float16,fp8,0,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,4,2,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,fp8,0,0.010391999781131745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,float16,0,0.01889120042324066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,float16,fp8,0,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,4,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,float16,float16,0,0.020716799795627593
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,1,128,1,fp8,fp8,0,0.010518400371074677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,float16,float16,0,0.020732800662517547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,4,2,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,float16,0,0.01876640021800995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,float16,fp8,0,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,4,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,float16,0,0.01868959963321686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,float16,0,0.05911039710044861
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,float16,fp8,0,0.01043040007352829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,1,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,float16,0,0.019972799718379973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,float16,fp8,0,0.010590399801731109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,4,2,128,1,fp8,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,float16,fp8,0,0.04735519886016846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,4,1,128,1,fp8,fp8,0,0.04737600088119507
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,float16,0,0.07477120161056519
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,float16,fp8,0,0.04736959934234619
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,fp8,fp8,0,0.0288783997297287
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,4,2,128,1,fp8,fp8,0,0.04733439981937408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,fp8,0,0.029017600417137145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,float16,0,0.05830720067024231
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,float16,fp8,0,0.028896000981330872
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,4,128,1,fp8,fp8,0,0.02881920039653778
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,float16,0,0.03929600119590759
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,float16,0,0.026791998744010927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,1,128,1,float16,fp8,0,0.028881600499153136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,float16,float16,0,0.045316800475120544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,4,2,128,1,fp8,fp8,0,0.02884480059146881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,fp8,0,0.01849440038204193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,float16,0,0.0372624009847641
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,float16,fp8,0,0.01847680062055588
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,4,128,1,fp8,fp8,0,0.018694399297237395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,float16,fp8,0,0.0186256006360054
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,1,128,1,fp8,fp8,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,float16,float16,0,0.031062400341033934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,4,2,128,1,fp8,fp8,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,float16,0,0.026974400877952574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,float16,fp8,0,0.012484800070524216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,4,128,1,fp8,fp8,0,0.012435200065374375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,float16,0,0.02282239943742752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,float16,fp8,0,0.012571200728416443
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,1,128,1,fp8,fp8,0,0.012516799569129943
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,float16,0,0.02273920029401779
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,float16,fp8,0,0.012459199875593185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,4,2,128,1,fp8,fp8,0,0.012460800260305405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,float16,0,0.020824000239372253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,4,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,float16,0,0.020795199275016784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,float16,0,0.020745599269866945
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,fp8,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,4,2,128,1,fp8,fp8,0,0.010521599650382995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,float16,float16,0,0.020623999834060668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,4,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,float16,0,0.020708799362182617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,1,128,1,float16,fp8,0,0.010424000024795533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,float16,0,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,4,2,128,1,fp8,fp8,0,0.010387200117111205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,float16,0,0.020665599405765532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,float16,fp8,0,0.01037919968366623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,4,128,1,fp8,fp8,0,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,float16,0,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,float16,fp8,0,0.010396800190210342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,1,128,1,fp8,fp8,0,0.009495999664068222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,float16,0,0.020601600408554077
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,fp8,fp8,0,0.010355199873447418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,4,2,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,float16,fp8,0,0.009001599997282029
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,4,128,1,fp8,fp8,0,0.010380800068378448
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,float16,0,0.018718400597572328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,float16,fp8,0,0.010331200063228607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,fp8,0,0.010289599746465683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,1,128,1,fp8,fp8,0,0.00841279998421669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,float16,0,0.018956799805164338
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,float16,fp8,0,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,fp8,fp8,0,0.009654399752616883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,4,2,128,1,fp8,fp8,0,0.009060800075531006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,float16,0,0.018775999546051025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,fp8,fp8,0,0.009932799637317658
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,float16,float16,0,0.01865600049495697
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,4,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,float16,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,1,128,1,float16,fp8,0,0.008432000130414962
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,fp8,0,0.0373744010925293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,4,2,128,1,float16,fp8,0,0.010281600058078766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,float16,0,0.05043839812278748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,fp8,fp8,0,0.03748959898948669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,4,1,128,1,float16,fp8,0,0.03721120059490204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,float16,float16,0,0.05628160238265991
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,fp8,fp8,0,0.02279040068387985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,4,2,128,1,fp8,fp8,0,0.03734880089759827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,float16,0,0.04347839951515198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,float16,fp8,0,0.023625600337982177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,4,128,1,fp8,fp8,0,0.02274399995803833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,float16,0,0.033129599690437314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,1,128,1,float16,fp8,0,0.023345600068569183
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,float16,0,0.037212800979614255
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,fp8,0,0.015084800124168397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,float16,fp8,0,0.022835199534893037
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,4,2,128,1,fp8,fp8,0,0.022780799865722658
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,float16,0,0.03086079955101013
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,float16,fp8,0,0.014641599357128143
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,4,128,1,fp8,fp8,0,0.014603200554847717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,float16,float16,0,0.02521440088748932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,1,128,1,fp8,fp8,0,0.014585599303245544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,float16,0,0.026929599046707154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,float16,fp8,0,0.014838400483131408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,4,2,128,1,fp8,fp8,0,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,float16,0,0.022668799757957457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,float16,fp8,0,0.010590399801731109
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,4,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,float16,0,0.021294400095939636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,1,128,1,fp8,fp8,0,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,float16,0,0.022720000147819518
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,float16,fp8,0,0.010407999902963639
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,4,2,128,1,fp8,fp8,0,0.010915199667215348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,float16,float16,0,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,4,128,1,fp8,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,float16,0,0.020712000131607056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,float16,fp8,0,0.010444799810647965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,1,128,1,fp8,fp8,0,0.010486400127410889
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,float16,0,0.020865599811077117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,float16,fp8,0,0.010414399951696397
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,4,2,128,1,fp8,fp8,0,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,float16,0,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,float16,fp8,0,0.008851200342178345
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,4,128,1,fp8,fp8,0,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,float16,fp8,0,0.008479999750852585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,1,128,1,fp8,fp8,0,0.009440000355243682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,float16,0,0.018719999492168425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,4,2,128,1,fp8,fp8,0,0.010344000160694122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,float16,0,0.018408000469207764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,float16,fp8,0,0.010360000282526016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,4,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,float16,fp8,0,0.009692800045013428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,1,128,1,fp8,fp8,0,0.008523199707269669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,float16,0,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,float16,fp8,0,0.008641599863767623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,4,2,128,1,fp8,fp8,0,0.008550400286912918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,fp8,fp8,0,0.008553600311279297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,float16,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,float16,fp8,0,0.009239999949932099
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,4,128,1,fp8,fp8,0,0.010435199737548828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,float16,0,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,1,128,1,float16,fp8,0,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,float16,0,0.017182399332523347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,float16,fp8,0,0.008532799780368805
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,4,2,128,1,fp8,fp8,0,0.009966400265693665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,float16,0,0.018590399622917177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,float16,fp8,0,0.009761600196361542
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,4,128,1,fp8,fp8,0,0.008924800157546996
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,float16,0,0.016740800440311433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,float16,fp8,0,0.008683200180530547
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,1,128,1,fp8,fp8,0,0.008417599648237229
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,float16,0,0.01857600063085556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,float16,fp8,0,0.008508799970149994
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,4,2,128,1,fp8,fp8,0,0.010009600222110749
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,float16,0,0.04544639885425568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,float16,0,0.03723360002040863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,float16,fp8,0,0.0330592006444931
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,float16,fp8,0,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,4,1,128,1,fp8,fp8,0,0.033476799726486206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,float16,0,0.03292160034179688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,float16,0,0.049516800045967105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,float16,fp8,0,0.03301439881324768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,4,2,128,1,fp8,fp8,0,0.03303520083427429
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,4,128,1,fp8,fp8,0,0.020822399854660036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,float16,fp8,0,0.02078240066766739
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,1,128,1,fp8,fp8,0,0.0208624005317688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,float16,0,0.033025598526000975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,float16,fp8,0,0.02075359970331192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,4,2,128,1,fp8,fp8,0,0.020824000239372253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,float16,0,0.02484000027179718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,fp8,0,0.014504000544548035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,float16,fp8,0,0.014547200500965118
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,4,128,1,fp8,fp8,0,0.014524799585342408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,float16,0,0.024769599735736846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,float16,float16,0,0.025222399830818178
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,1,128,1,fp8,fp8,0,0.014670400321483612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,float16,0,0.02263839989900589
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,4,2,128,1,fp8,fp8,0,0.014523200690746307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,float16,fp8,0,0.010417599976062775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,4,128,1,fp8,fp8,0,0.01048320010304451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,float16,0,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,float16,fp8,0,0.010353600233793258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,float16,0,0.022273600101470947
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,float16,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,4,2,128,1,fp8,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,float16,0,0.018768000602722167
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,float16,0,0.019601599872112276
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,float16,fp8,0,0.010447999835014344
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,4,128,1,fp8,fp8,0,0.01045600026845932
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,float16,0,0.018662400543689728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,1,128,1,fp8,fp8,0,0.010558400303125381
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,4,2,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,float16,0,0.018614399433135986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,float16,fp8,0,0.00939520001411438
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,4,128,1,fp8,fp8,0,0.008462399989366532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,fp8,fp8,0,0.009292799979448318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,float16,0,0.018593600392341612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,float16,fp8,0,0.008944000303745269
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,1,128,1,fp8,fp8,0,0.010344000160694122
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,float16,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,4,2,128,1,float16,fp8,0,0.009379199892282485
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,float16,0,0.018606400489807128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,fp8,0,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,fp8,fp8,0,0.008448000252246856
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,4,128,1,float16,fp8,0,0.008475200086832047
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,float16,0,0.016760000586509706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,float16,fp8,0,0.008495999872684479
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,1,128,1,fp8,fp8,0,0.009652800112962722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,float16,float16,0,0.018651199340820313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,fp8,fp8,0,0.009468799829483033
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,4,2,128,1,fp8,fp8,0,0.010196799784898758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,float16,0,0.018620799481868743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,float16,fp8,0,0.008470399677753449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,4,128,1,fp8,fp8,0,0.008463999629020691
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,float16,0,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,1,128,1,float16,fp8,0,0.008534400165081025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,float16,fp8,0,0.008654399961233138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,4,2,128,1,fp8,fp8,0,0.010388799756765366
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,float16,0,0.0186831995844841
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,float16,fp8,0,0.00867839977145195
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,4,128,1,fp8,fp8,0,0.008372800052165985
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,float16,0,0.01663520038127899
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,float16,fp8,0,0.008392000198364257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,1,128,1,fp8,fp8,0,0.008444800227880477
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,float16,0,0.018665599822998046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,float16,fp8,0,0.00958240032196045
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,4,2,128,1,fp8,fp8,0,0.008414400368928909
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,float16,0,1.0576640129089356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,float16,fp8,0,0.9335807800292969
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,fp8,0,0.5305056095123291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,float16,float16,0,0.6742671966552735
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16384,2,1,128,1,fp8,fp8,0,0.9367440223693848
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,2,128,1,fp8,fp8,0,0.5300096035003662
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,float16,0,0.5926144123077393
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,float16,fp8,0,0.5370016098022461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,float16,0,0.3995743989944458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,2,1,128,1,fp8,fp8,0,0.530027198791504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,fp8,0,0.3268064022064209
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,float16,fp8,0,0.3284048080444336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,float16,0,0.23892319202423096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,2,128,1,fp8,fp8,0,0.32953600883483886
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,float16,float16,0,0.348908805847168
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,2,1,128,1,fp8,fp8,0,0.32815520763397216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,fp8,0,0.20883679389953613
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,float16,fp8,0,0.2083199977874756
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,2,128,1,fp8,fp8,0,0.20773119926452638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,float16,float16,0,0.2249295949935913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,2,1,128,1,fp8,fp8,0,0.2100831985473633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,float16,0,0.6611680030822754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,float16,fp8,0,0.5860527992248535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,12288,2,1,128,1,fp8,fp8,0,0.586678409576416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,float16,0,0.4391280174255371
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,float16,fp8,0,0.33907361030578614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,2,128,1,fp8,fp8,0,0.33943519592285154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,float16,0,0.3767647981643677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,float16,fp8,0,0.33809919357299806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,2,1,128,1,fp8,fp8,0,0.3376287937164307
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,float16,0,0.2534048080444336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,fp8,0,0.2141871929168701
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,float16,fp8,0,0.21503040790557862
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,2,128,1,fp8,fp8,0,0.21401119232177734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,float16,float16,0,0.22779839038848876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,2,1,128,1,fp8,fp8,0,0.21342079639434813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,fp8,fp8,0,0.1594223976135254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,float16,0,0.18402880430221558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,float16,fp8,0,0.1600592017173767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,2,128,1,fp8,fp8,0,0.15915520191192628
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,float16,0,0.1731487989425659
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,2,1,128,1,float16,fp8,0,0.1601871967315674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,fp8,0,0.26345119476318357
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,fp8,fp8,0,0.26602559089660643
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,float16,0,0.4968751907348633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,float16,fp8,0,0.4411263942718506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,10240,2,1,128,1,fp8,fp8,0,0.4414224147796631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,2,128,1,float16,float16,0,0.34628639221191404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,float16,0,0.2859247922897339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,float16,fp8,0,0.26434400081634524
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,2,1,128,1,fp8,fp8,0,0.2637279987335205
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,float16,0,0.192740797996521
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,float16,fp8,0,0.1596959948539734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,2,128,1,fp8,fp8,0,0.15864800214767455
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,float16,0,0.17119200229644777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,float16,fp8,0,0.1590656042098999
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,2,1,128,1,fp8,fp8,0,0.15903840065002442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,float16,0,0.15577759742736816
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,float16,fp8,0,0.13531359434127807
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,2,128,1,fp8,fp8,0,0.13520480394363404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,float16,0,0.14599679708480834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,float16,fp8,0,0.1352944016456604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,2,1,128,1,fp8,fp8,0,0.1353119969367981
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,float16,0,0.636678409576416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,fp8,fp8,0,0.31808640956878664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,float16,fp8,0,0.5644927978515625
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,8192,2,1,128,1,fp8,fp8,0,0.5645423889160156
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,float16,0,0.4351151943206787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,2,128,1,float16,fp8,0,0.31890718936920165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,float16,0,0.35622398853302
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,float16,fp8,0,0.31601920127868655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,2,1,128,1,fp8,fp8,0,0.315012788772583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,float16,0,0.25503199100494384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,float16,fp8,0,0.19339359998703004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,2,128,1,fp8,fp8,0,0.1934928059577942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,float16,0,0.20704638957977295
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,float16,fp8,0,0.1951151967048645
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,2,1,128,1,fp8,fp8,0,0.19374719858169556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,float16,0,0.14377440214157106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,float16,fp8,0,0.11998560428619384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,2,128,1,fp8,fp8,0,0.12095520496368409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,float16,0,0.1303328037261963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,float16,fp8,0,0.11995840072631836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,2,1,128,1,fp8,fp8,0,0.11991039514541627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,fp8,fp8,0,0.1106160044670105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,float16,0,0.1277583956718445
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,float16,fp8,0,0.1107983946800232
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,2,128,1,fp8,fp8,0,0.11073919534683227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,float16,0,0.12127840518951416
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,2,1,128,1,float16,fp8,0,0.11083840131759644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,float16,0,0.4141456127166748
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,float16,fp8,0,0.365775990486145
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,6144,2,1,128,1,fp8,fp8,0,0.36604158878326415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,float16,0,0.2983551979064941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,float16,fp8,0,0.20868000984191895
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,2,128,1,fp8,fp8,0,0.20777440071105957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,float16,0,0.2309999942779541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,float16,fp8,0,0.2086944103240967
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,2,1,128,1,fp8,fp8,0,0.20759520530700684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,float16,0,0.16495360136032106
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,float16,0,0.11269439458847046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,float16,fp8,0,0.12961599826812745
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,2,128,1,fp8,fp8,0,0.1299839973449707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,fp8,fp8,0,0.09242079854011535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,float16,0,0.14120639562606813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,float16,fp8,0,0.1303056001663208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,2,1,128,1,fp8,fp8,0,0.1293328046798706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,2,128,1,float16,fp8,0,0.09265279769897461
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,float16,0,0.10304960012435913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,float16,fp8,0,0.09244800209999085
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,2,1,128,1,fp8,fp8,0,0.09243040084838867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,float16,0,0.10034879446029663
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,fp8,fp8,0,0.08539680242538453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,float16,fp8,0,0.08585280179977417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,2,128,1,fp8,fp8,0,0.08422719836235046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,float16,0,0.09633119702339173
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,2,1,128,1,float16,fp8,0,0.08614720106124878
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,float16,0,0.4280816078186035
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,float16,fp8,0,0.37457120418548584
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,4096,2,1,128,1,fp8,fp8,0,0.3771536111831665
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,float16,0,0.3189791917800903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,float16,fp8,0,0.20739359855651857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,2,128,1,fp8,fp8,0,0.20821280479431153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,float16,0,0.23863840103149414
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,float16,fp8,0,0.20657761096954347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,2,1,128,1,fp8,fp8,0,0.20563039779663086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,float16,0,0.18095200061798095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,float16,fp8,0,0.12405120134353638
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,2,128,1,fp8,fp8,0,0.12352479696273803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,fp8,0,0.07509599924087525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,float16,0,0.13492159843444823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,float16,fp8,0,0.12381119728088379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,2,1,128,1,fp8,fp8,0,0.12379200458526611
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,float16,float16,0,0.09587839841842652
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,fp8,fp8,0,0.06571199893951415
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,2,128,1,fp8,fp8,0,0.07427520155906678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,float16,0,0.08255199790000915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,float16,fp8,0,0.07420160174369812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,2,1,128,1,fp8,fp8,0,0.07472000122070313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,float16,0,0.08026080131530762
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,2,128,1,float16,fp8,0,0.06521919965744019
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,float16,0,0.0739471971988678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,float16,fp8,0,0.0657584011554718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,2,1,128,1,fp8,fp8,0,0.06497920155525208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,float16,0,0.0725488007068634
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,float16,fp8,0,0.05976639986038208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,2,128,1,fp8,fp8,0,0.05995839834213257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,float16,0,0.06814720034599304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,float16,fp8,0,0.05964959859848022
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,2,1,128,1,fp8,fp8,0,0.059601598978042604
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,float16,0,0.29281599521636964
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,float16,fp8,0,0.2528640031814575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,3072,2,1,128,1,fp8,fp8,0,0.25256800651550293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,float16,0,0.22758560180664061
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,float16,fp8,0,0.14318560361862182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,2,128,1,fp8,fp8,0,0.14199039936065674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,float16,0,0.16435359716415404
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,float16,fp8,0,0.14184800386428834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,2,1,128,1,fp8,fp8,0,0.141867196559906
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,fp8,0,0.08616799712181092
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,float16,float16,0,0.12537920475006104
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,2,128,1,fp8,fp8,0,0.08626719713211059
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,float16,0,0.09590880274772644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,float16,fp8,0,0.08640959858894348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,2,1,128,1,fp8,fp8,0,0.08627840280532836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,float16,0,0.0777296006679535
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,float16,fp8,0,0.057899200916290285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,2,128,1,fp8,fp8,0,0.0576479971408844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,float16,0,0.0667743980884552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,float16,fp8,0,0.057608002424240114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,float16,0,0.06589120030403137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,2,1,128,1,fp8,fp8,0,0.05762240290641785
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,float16,fp8,0,0.051444798707962036
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,2,128,1,fp8,fp8,0,0.05133919715881348
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,float16,0,0.05999839901924133
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,float16,fp8,0,0.051395201683044435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,2,1,128,1,fp8,fp8,0,0.05134879946708679
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,float16,0,0.05963839888572693
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,float16,fp8,0,0.04740320146083832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,2,128,1,fp8,fp8,0,0.04738079905509949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,float16,0,0.05689120292663574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,float16,fp8,0,0.04734399914741516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,2,1,128,1,fp8,fp8,0,0.04731679856777191
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,float16,0,0.32202560901641847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,float16,fp8,0,0.27797279357910154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,2048,2,1,128,1,fp8,fp8,0,0.27700319290161135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,float16,0,0.2582751989364624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,float16,fp8,0,0.1519551992416382
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,2,128,1,fp8,fp8,0,0.15215840339660644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,float16,0,0.18044960498809814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,float16,0,0.09633920192718506
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,float16,fp8,0,0.08745120167732238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,float16,fp8,0,0.15002399682998657
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,2,1,128,1,fp8,fp8,0,0.15017919540405272
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,float16,0,0.14521119594573975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,float16,fp8,0,0.08629599809646607
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,2,128,1,fp8,fp8,0,0.086844801902771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,2,1,128,1,fp8,fp8,0,0.08646240234375
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,float16,0,0.07288479804992676
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,float16,fp8,0,0.051472002267837526
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,2,128,1,fp8,fp8,0,0.0514735996723175
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,float16,0,0.05829280018806458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,float16,fp8,0,0.05134559869766235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,2,1,128,1,fp8,fp8,0,0.05135520100593567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,float16,0,0.055667197704315184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,float16,fp8,0,0.041289600729942325
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,2,128,1,fp8,fp8,0,0.041331198811531064
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,float16,0,0.05018240213394165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,float16,fp8,0,0.04116480052471161
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,2,1,128,1,fp8,fp8,0,0.04119519889354706
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,float16,0,0.049414399266242984
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,float16,fp8,0,0.03711360096931458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,2,128,1,fp8,fp8,0,0.037196800112724304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,float16,0,0.04519999921321869
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,float16,fp8,0,0.03748959898948669
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,2,1,128,1,fp8,fp8,0,0.037201601266860965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,float16,0,0.045278400182724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,float16,fp8,0,0.03510720133781433
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,2,128,1,fp8,fp8,0,0.034985598921775815
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,float16,0,0.044715198874473575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,float16,fp8,0,0.03497759997844696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,2,1,128,1,fp8,fp8,0,0.03507519960403442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,float16,0,0.2307408094406128
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,float16,fp8,0,0.19515999555587768
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1536,2,1,128,1,fp8,fp8,0,0.1949936032295227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,fp8,0,0.10681439638137817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,float16,0,0.19090720415115356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,float16,fp8,0,0.10673279762268066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,2,128,1,fp8,fp8,0,0.10786240100860596
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,float16,float16,0,0.12839839458465577
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,2,1,128,1,fp8,fp8,0,0.1067039966583252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,float16,0,0.09444640278816223
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,float16,fp8,0,0.06359840035438538
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,2,128,1,fp8,fp8,0,0.06369600296020508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,float16,0,0.07078880071640015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,float16,fp8,0,0.06354879736900329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,2,1,128,1,fp8,fp8,0,0.06355519890785218
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,float16,0,0.059355199337005615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,float16,fp8,0,0.04110719859600067
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,2,128,1,fp8,fp8,0,0.041115200519561766
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,float16,0,0.04947839975357056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,float16,fp8,0,0.04113599956035614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,2,1,128,1,fp8,fp8,0,0.04110400080680847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,float16,0,0.04755359888076782
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,float16,fp8,0,0.0330159991979599
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,2,128,1,fp8,fp8,0,0.033129599690437314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,float16,0,0.04325439929962158
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,float16,fp8,0,0.03310079872608185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,2,1,128,1,fp8,fp8,0,0.03367359936237335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,float16,0,0.04182400107383728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,float16,fp8,0,0.030851200222969055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,2,128,1,fp8,fp8,0,0.031011199951171874
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,float16,0,0.039182400703430174
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,float16,fp8,0,0.030862399935722352
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,2,1,128,1,fp8,fp8,0,0.030937600135803222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,float16,0,0.03905600011348724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,float16,fp8,0,0.028908801078796387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,float16,0,0.2706496000289917
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,float16,fp8,0,0.22848000526428222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,2,128,1,fp8,fp8,0,0.028865599632263185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,float16,0,0.03914079964160919
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,float16,fp8,0,0.028839999437332155
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,2,1,128,1,fp8,fp8,0,0.028825598955154418
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,fp8,0,0.12327040433883667
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,1024,2,1,128,1,fp8,fp8,0,0.2280911922454834
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,float16,float16,0,0.2274768114089966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,2,128,1,fp8,fp8,0,0.12309600114822387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,float16,0,0.14953279495239258
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,float16,fp8,0,0.12123520374298095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,2,1,128,1,fp8,fp8,0,0.1211967945098877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,float16,0,0.1237104058265686
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,float16,fp8,0,0.06845600008964539
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,2,128,1,fp8,fp8,0,0.06803839802742004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,float16,0,0.07486879825592041
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,float16,fp8,0,0.06796640157699585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,2,1,128,1,fp8,fp8,0,0.06788319945335389
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,float16,0,0.05987039804458618
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,float16,fp8,0,0.03919360041618347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,2,128,1,fp8,fp8,0,0.03925440013408661
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,float16,0,0.04623199999332428
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,float16,fp8,0,0.039059200882911684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,2,1,128,1,fp8,fp8,0,0.039263999462127684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,float16,0,0.04518559873104096
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,float16,fp8,0,0.030772799253463747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,2,128,1,fp8,fp8,0,0.03054879903793335
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,float16,0,0.03775840103626251
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,fp8,fp8,0,0.030033600330352784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,2,1,128,1,float16,fp8,0,0.030926400423049928
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,float16,0,0.03824639916419983
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,float16,fp8,0,0.024780799448490144
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,2,128,1,fp8,fp8,0,0.024817599356174468
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,float16,0,0.033232000470161435
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,float16,fp8,0,0.02496480047702789
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,float16,0,0.03299359977245331
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,2,1,128,1,fp8,fp8,0,0.02508159875869751
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,float16,fp8,0,0.02288320064544678
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,2,128,1,fp8,fp8,0,0.022759999334812164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,float16,0,0.03307519853115082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,float16,fp8,0,0.022942399978637694
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,2,1,128,1,fp8,fp8,0,0.022815999388694764
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,float16,0,0.03301919996738434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,float16,fp8,0,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,2,128,1,fp8,fp8,0,0.022681599855422972
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,float16,0,0.03185920119285583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,float16,fp8,0,0.02279680073261261
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,2,1,128,1,fp8,fp8,0,0.022752000391483305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,float16,0,0.24543519020080568
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,float16,fp8,0,0.20347681045532226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,512,2,1,128,1,fp8,fp8,0,0.20488638877868653
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,fp8,0,0.10719200372695922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,float16,0,0.2130064010620117
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,fp8,fp8,0,0.10688639879226684
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,float16,fp8,0,0.10882079601287842
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,fp8,fp8,0,0.0596560001373291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,2,2,128,1,fp8,fp8,0,0.10848640203475952
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,2,1,128,1,float16,float16,0,0.13525279760360717
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,fp8,0,0.05960639715194702
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,2,2,128,1,float16,float16,0,0.1137887954711914
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,float16,0,0.06599360108375549
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,float16,fp8,0,0.05960800051689148
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,2,1,128,1,fp8,fp8,0,0.059646397829055786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,fp8,fp8,0,0.033020800352096556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,float16,0,0.053579199314117434
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,float16,fp8,0,0.03352159857749939
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,2,2,128,1,fp8,fp8,0,0.03342080116271973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,float16,0,0.04120799899101257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,2,1,128,1,float16,fp8,0,0.03300960063934326
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,float16,0,0.03889760076999664
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,float16,fp8,0,0.02486719936132431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,2,2,128,1,fp8,fp8,0,0.02481440007686615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,float16,0,0.03312320113182068
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,float16,fp8,0,0.02479359954595566
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,2,1,128,1,fp8,fp8,0,0.024825599789619446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,float16,0,0.03128960132598877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,float16,fp8,0,0.018833599984645844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,2,2,128,1,fp8,fp8,0,0.020641599595546723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,float16,0,0.028519999980926514
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,float16,fp8,0,0.01924159973859787
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,2,1,128,1,fp8,fp8,0,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,float16,0,0.026943999528884887
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,float16,fp8,0,0.016752000153064727
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,fp8,fp8,0,0.016732800006866454
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,2,2,128,1,fp8,fp8,0,0.017752000689506532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,float16,0,0.02677919864654541
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,float16,fp8,0,0.017467199265956877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,2,1,128,1,fp8,fp8,0,0.018479999899864197
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,float16,0,0.026870399713516235
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,2,2,128,1,float16,fp8,0,0.0166143998503685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,float16,0,0.026782399415969847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,float16,fp8,0,0.01659359931945801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,2,1,128,1,fp8,fp8,0,0.01666239947080612
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,float16,0,0.02508319914340973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,float16,0,0.12702399492263794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,float16,fp8,0,0.016659200191497803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,2,2,128,1,fp8,fp8,0,0.016603200137615202
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,float16,0,0.02686080038547516
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,float16,fp8,0,0.01652960032224655
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,2,1,128,1,fp8,fp8,0,0.01658719927072525
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,float16,fp8,0,0.09951040148735046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,256,2,1,128,1,fp8,fp8,0,0.09893760085105896
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,float16,0,0.10721280574798583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,float16,fp8,0,0.05350720286369324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,2,2,128,1,fp8,fp8,0,0.05342239737510681
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,float16,0,0.05968800187110901
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,float16,fp8,0,0.05341119766235351
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,2,1,128,1,fp8,fp8,0,0.05355039834976196
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,float16,0,0.05134080052375793
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,float16,fp8,0,0.030913600325584413
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,2,2,128,1,fp8,fp8,0,0.030895999073982237
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,float16,0,0.03721120059490204
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,float16,fp8,0,0.03091840147972107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,2,1,128,1,fp8,fp8,0,0.031009599566459656
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,float16,0,0.035262399911880495
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,float16,fp8,0,0.021076799929142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,2,2,128,1,fp8,fp8,0,0.02271520048379898
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,float16,0,0.029123198986053467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,float16,fp8,0,0.020875200629234314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,2,1,128,1,fp8,fp8,0,0.020688000321388244
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,float16,0,0.028908801078796387
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,float16,fp8,0,0.01658879965543747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,2,2,128,1,fp8,fp8,0,0.016627199947834015
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,float16,0,0.02478239983320236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,float16,fp8,0,0.016649599373340606
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,2,1,128,1,fp8,fp8,0,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,float16,0,0.02476480007171631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,float16,0,0.02478879988193512
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,float16,fp8,0,0.014689600467681885
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,2,2,128,1,fp8,fp8,0,0.014580799639225006
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,float16,0,0.024769599735736846
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,float16,fp8,0,0.014555199444293976
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,2,1,128,1,fp8,fp8,0,0.01451680064201355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,float16,fp8,0,0.013491199910640716
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,2,2,128,1,fp8,fp8,0,0.014452800154685974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,float16,0,0.022843199968338012
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,float16,fp8,0,0.014478400349617004
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,2,1,128,1,fp8,fp8,0,0.014294399321079254
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,float16,0,0.024716800451278685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,float16,fp8,0,0.013334399461746216
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,2,2,128,1,fp8,fp8,0,0.012643200159072877
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,float16,0,0.02268960028886795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,float16,fp8,0,0.012644800543785095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,2,1,128,1,fp8,fp8,0,0.012627199292182922
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,float16,0,0.022758400440216063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,float16,fp8,0,0.01252799928188324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,2,2,128,1,fp8,fp8,0,0.012774400413036346
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,float16,0,0.02274399995803833
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,float16,fp8,0,0.012579199671745301
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,2,1,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,float16,0,0.06456639766693115
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,fp8,fp8,0,0.05549600124359131
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,128,2,1,128,1,float16,fp8,0,0.05556640028953552
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,float16,0,0.051769602298736575
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,float16,fp8,0,0.03105440139770508
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,2,2,128,1,fp8,fp8,0,0.030955201387405394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,fp8,0,0.030990400910377504
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,float16,float16,0,0.03914560079574585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,fp8,0,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,2,1,128,1,fp8,fp8,0,0.03089439868927002
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,float16,0,0.03394719958305359
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,fp8,0,0.014681600034236908
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,float16,fp8,0,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,2,2,128,1,fp8,fp8,0,0.018654400110244752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,float16,float16,0,0.027289599180221558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,2,1,128,1,fp8,fp8,0,0.018769599497318268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,float16,float16,0,0.026976001262664796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,2,2,128,1,fp8,fp8,0,0.014553600549697876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,float16,0,0.022703999280929567
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,float16,0,0.020720000565052032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,float16,fp8,0,0.014697599411010741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,2,1,128,1,fp8,fp8,0,0.014486399292945863
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,float16,0,0.020670400559902193
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,fp8,fp8,0,0.010608000308275222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,2,2,128,1,fp8,fp8,0,0.012479999661445617
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,float16,fp8,0,0.012436799705028534
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,2,1,128,1,fp8,fp8,0,0.012411200255155564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,float16,0,0.020755200088024138
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,2,2,128,1,float16,fp8,0,0.010592000186443329
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,float16,0,0.021223999559879303
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,fp8,0,0.010659199953079224
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,float16,fp8,0,0.010599999874830245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,2,1,128,1,fp8,fp8,0,0.01069599986076355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,fp8,fp8,0,0.010412800312042236
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,2,2,128,1,float16,fp8,0,0.010596799850463866
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,2,1,128,1,fp8,fp8,0,0.010582400113344192
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,float16,float16,0,0.020596800744533537
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,2,2,128,1,fp8,fp8,0,0.010462400317192078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,float16,0,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,float16,fp8,0,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,2,1,128,1,fp8,fp8,0,0.010375999659299851
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,float16,0,0.018617600202560425
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,float16,fp8,0,0.010497599840164185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,2,2,128,1,fp8,fp8,0,0.010467199981212616
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,float16,0,0.01865919977426529
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,float16,fp8,0,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,2,1,128,1,fp8,fp8,0,0.010358399897813796
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,float16,0,0.04532159864902496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,float16,fp8,0,0.03720960021018982
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,64,2,1,128,1,fp8,fp8,0,0.03715200126171112
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,float16,0,0.03717440068721771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,float16,fp8,0,0.02276639938354492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,2,2,128,1,fp8,fp8,0,0.02280000001192093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,float16,0,0.03173600137233734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,float16,fp8,0,0.02269120067358017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,2,1,128,1,fp8,fp8,0,0.02268480062484741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,float16,0,0.027369600534439088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,float16,fp8,0,0.014532800018787383
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,2,2,128,1,fp8,fp8,0,0.014612799882888794
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,float16,0,0.022675199806690215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,fp8,0,0.012529599666595458
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,float16,fp8,0,0.01446399986743927
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,2,1,128,1,fp8,fp8,0,0.014640000462532044
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,float16,0,0.02072799950838089
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,float16,fp8,0,0.012636800110340119
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,2,2,128,1,fp8,fp8,0,0.01242400035262108
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,float16,float16,0,0.020640000700950623
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,2,1,128,1,fp8,fp8,0,0.012353599816560746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,float16,0,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,float16,fp8,0,0.010463999956846238
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,2,2,128,1,fp8,fp8,0,0.010489600151777268
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,float16,0,0.02075680047273636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,float16,fp8,0,0.010465600341558457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,2,1,128,1,fp8,fp8,0,0.01048479974269867
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,float16,0,0.01974239945411682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,float16,fp8,0,0.010372799634933472
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,2,2,128,1,fp8,fp8,0,0.010478399693965912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,float16,0,0.020638400316238405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,float16,fp8,0,0.010411199927330018
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,2,1,128,1,fp8,fp8,0,0.010443200170993806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,float16,0,0.0193792000412941
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,float16,fp8,0,0.01040000021457672
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,2,2,128,1,fp8,fp8,0,0.009652800112962722
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,float16,0,0.020531199872493744
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,2,1,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,float16,0,0.018592000007629395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,fp8,0,0.010289599746465683
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,float16,fp8,0,0.010395199805498124
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,2,2,128,1,fp8,fp8,0,0.010438399761915207
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,float16,0,0.018595199286937713
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,float16,fp8,0,0.008436799794435502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,2,1,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,float16,float16,0,0.018688000738620758
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,2,2,128,1,fp8,fp8,0,0.008675199747085572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,float16,0,0.01857759952545166
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,float16,fp8,0,0.00955199971795082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,2,1,128,1,fp8,fp8,0,0.008427199721336365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,float16,0,0.03777439892292023
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,float16,fp8,0,0.026848000288009644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,32,2,1,128,1,fp8,fp8,0,0.026894399523735048
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,float16,0,0.031027200818061828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,float16,fp8,0,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,fp8,0,0.012566399574279786
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,2,2,128,1,fp8,fp8,0,0.016595199704170227
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,fp8,fp8,0,0.012542399764060973
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,float16,0,0.021878400444984437
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,fp8,0,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,float16,float16,0,0.025961598753929137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,2,1,128,1,fp8,fp8,0,0.016542400419712066
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,2,2,128,1,float16,float16,0,0.02287199944257736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,float16,fp8,0,0.012468799948692322
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,2,1,128,1,fp8,fp8,0,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,float16,0,0.020854400098323823
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,float16,0,0.02067199945449829
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,float16,fp8,0,0.010441599786281586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,2,2,128,1,fp8,fp8,0,0.010475199669599533
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,float16,0,0.020771199464797975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,float16,fp8,0,0.010523200035095215
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,2,1,128,1,fp8,fp8,0,0.010503999888896942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,fp8,0,0.010761599987745285
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,2,2,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,float16,0,0.020628799498081208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,float16,fp8,0,0.01029599979519844
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,2,1,128,1,fp8,fp8,0,0.010377600044012069
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,float16,float16,0,0.01879040002822876
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,2,2,128,1,fp8,fp8,0,0.009670399874448777
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,float16,0,0.01873279958963394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,fp8,0,0.010422399640083313
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,float16,fp8,0,0.009487999975681305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,2,1,128,1,fp8,fp8,0,0.00926399976015091
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,float16,0,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,float16,fp8,0,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,2,2,128,1,fp8,fp8,0,0.00915839970111847
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,float16,float16,0,0.01858240067958832
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,fp8,fp8,0,0.009569600224494934
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,2,1,128,1,fp8,fp8,0,0.008449599891901017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,fp8,0,0.010334400087594986
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,float16,0,0.018563200533390046
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,float16,fp8,0,0.00889120027422905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,2,2,128,1,fp8,fp8,0,0.008478400111198426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,float16,0,0.01868640035390854
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,2,1,128,1,float16,fp8,0,0.008454400300979614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,fp8,0,0.022886399924755097
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,float16,float16,0,0.018638400733470915
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,fp8,fp8,0,0.022868800163269042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,2,2,128,1,fp8,fp8,0,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,float16,0,0.018675200641155243
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,float16,fp8,0,0.00926079973578453
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,2,1,128,1,fp8,fp8,0,0.008551999926567078
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,256,16,2,1,128,1,float16,float16,0,0.0329584002494812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,float16,0,0.025436800718307496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,float16,fp8,0,0.014636799693107605
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,2,2,128,1,fp8,fp8,0,0.014584000408649444
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,float16,0,0.02480800002813339
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,float16,fp8,0,0.014801600575447082
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,2,1,128,1,fp8,fp8,0,0.014684799313545226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,float16,0,0.021583999693393707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,float16,fp8,0,0.010516799986362457
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,2,2,128,1,fp8,fp8,0,0.010787200182676315
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,float16,0,0.020707200467586517
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,float16,fp8,0,0.010473600029945374
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,2,1,128,1,fp8,fp8,0,0.010446400195360184
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,float16,0,0.018636800348758698
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,float16,0,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,float16,fp8,0,0.010416000336408614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,2,2,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,float16,0,0.02064319998025894
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,float16,fp8,0,0.010470400005578995
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,2,1,128,1,fp8,fp8,0,0.010363200306892395
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,float16,fp8,0,0.009614399820566177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,2,2,128,1,fp8,fp8,0,0.010086400061845779
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,float16,0,0.01867839992046356
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,float16,fp8,0,0.010409600287675857
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,2,1,128,1,fp8,fp8,0,0.008675199747085572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,float16,0,0.017049600183963776
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,float16,fp8,0,0.009174399822950364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,2,2,128,1,fp8,fp8,0,0.008777599781751633
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,float16,fp8,0,0.008511999994516373
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,2,1,128,1,fp8,fp8,0,0.009763199836015701
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,float16,0,0.018681600689888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,float16,fp8,0,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,2,2,128,1,fp8,fp8,0,0.008604799956083297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,float16,0,0.018779200315475465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,float16,fp8,0,0.008390399813652038
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,2,1,128,1,fp8,fp8,0,0.009513600170612336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,float16,0,0.016956800222396852
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,float16,fp8,0,0.010043200105428696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,2,2,128,1,fp8,fp8,0,0.00843999981880188
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,float16,0,0.016676799952983858
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,float16,fp8,0,0.00849440023303032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,2,1,128,1,fp8,fp8,0,0.008441600203514098
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,float16,0,0.018670399487018586
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,float16,fp8,0,0.008342400193214417
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,2,2,128,1,fp8,fp8,0,0.009889599680900574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,float16,0,0.01860959976911545
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,float16,fp8,0,0.008452799916267396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,2,1,128,1,fp8,fp8,0,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,float16,0,0.39859519004821775
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,float16,fp8,0,0.3612720012664795
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,float16,0,0.23841280937194825
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16384,1,1,128,1,fp8,fp8,0,0.36300320625305177
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,float16,fp8,0,0.2256608009338379
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16384,1,1,128,1,fp8,fp8,0,0.22577118873596191
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,float16,0,0.22076799869537353
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,fp8,fp8,0,0.20724480152130126
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16384,1,1,128,1,float16,fp8,0,0.2069727897644043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,float16,0,0.2556544065475464
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,float16,fp8,0,0.23840639591217042
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,12288,1,1,128,1,fp8,fp8,0,0.2388751983642578
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,float16,0,0.18393440246582032
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,float16,fp8,0,0.17251839637756347
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,float16,0,0.1904271960258484
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,12288,1,1,128,1,fp8,fp8,0,0.17258399724960327
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,float16,0,0.17093440294265747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,float16,fp8,0,0.15791200399398803
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,12288,1,1,128,1,fp8,fp8,0,0.1582527995109558
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,float16,fp8,0,0.17973599433898926
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,fp8,0,0.1455664038658142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,fp8,fp8,0,0.1459280014038086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,fp8,0,0.1354207992553711
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,10240,1,1,128,1,float16,float16,0,0.1546560049057007
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,10240,1,1,128,1,fp8,fp8,0,0.18068159818649293
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,float16,float16,0,0.143777596950531
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,fp8,fp8,0,0.2293855905532837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,10240,1,1,128,1,fp8,fp8,0,0.13363840579986572
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,float16,0,0.2559583902359009
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,8192,1,1,128,1,float16,fp8,0,0.2282320022583008
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,float16,0,0.14503519535064696
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,float16,0,0.11939040422439576
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,float16,fp8,0,0.10886240005493164
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,float16,fp8,0,0.13681440353393554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,8192,1,1,128,1,fp8,fp8,0,0.13785920143127442
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,float16,0,0.12743040323257446
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,fp8,fp8,0,0.15476800203323365
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,float16,fp8,0,0.1190176010131836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,8192,1,1,128,1,fp8,fp8,0,0.11892960071563721
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,8192,1,1,128,1,fp8,fp8,0,0.10949280261993408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,float16,0,0.16556639671325685
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,6144,1,1,128,1,float16,fp8,0,0.15502079725265502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,float16,0,0.11237920522689819
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,float16,fp8,0,0.10469919443130493
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,6144,1,1,128,1,fp8,fp8,0,0.10480159521102905
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,float16,0,0.10076960325241088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,float16,fp8,0,0.09161279797554016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,6144,1,1,128,1,fp8,fp8,0,0.0921999990940094
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,float16,0,0.09434880018234253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,float16,fp8,0,0.08422880172729492
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,6144,1,1,128,1,fp8,fp8,0,0.08427519798278808
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,float16,0,0.1804159998893738
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,float16,fp8,0,0.15797439813613892
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,4096,1,1,128,1,fp8,fp8,0,0.1582368016242981
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,float16,0,0.09682080149650574
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,float16,fp8,0,0.09052799940109253
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,4096,1,1,128,1,fp8,fp8,0,0.09074239730834961
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,float16,0,0.08063520193099975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,float16,fp8,0,0.07390879988670349
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,4096,1,1,128,1,fp8,fp8,0,0.07384799718856812
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,float16,0,0.0723904013633728
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,float16,fp8,0,0.0637776017189026
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,fp8,0,0.11171679496765137
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,4096,1,1,128,1,fp8,fp8,0,0.06451039910316467
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,float16,0,0.06777759790420532
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,float16,fp8,0,0.05958399772644043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,4096,1,1,128,1,fp8,fp8,0,0.059614402055740354
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,float16,float16,0,0.12033920288085938
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,3072,1,1,128,1,fp8,fp8,0,0.11154240369796753
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,float16,0,0.07643679976463318
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,float16,fp8,0,0.07182559967041016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,3072,1,1,128,1,fp8,fp8,0,0.07050560116767883
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,float16,0,0.06594240069389343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,float16,fp8,0,0.057574397325515746
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,3072,1,1,128,1,fp8,fp8,0,0.05756800174713135
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,float16,0,0.0596560001373291
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,float16,0,0.14368640184402465
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,float16,fp8,0,0.05137760043144226
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,3072,1,1,128,1,fp8,fp8,0,0.05138880014419556
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,float16,0,0.05740640163421631
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,float16,fp8,0,0.04727360010147095
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,3072,1,1,128,1,fp8,fp8,0,0.0473471999168396
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,float16,fp8,0,0.12107839584350585
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,float16,0,0.07269279956817627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,2048,1,1,128,1,fp8,fp8,0,0.12142080068588257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,float16,fp8,0,0.06786879897117615
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,2048,1,1,128,1,fp8,fp8,0,0.06781759858131409
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,float16,0,0.05564640164375305
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,float16,fp8,0,0.050329601764678954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,2048,1,1,128,1,fp8,fp8,0,0.04940159916877747
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,float16,0,0.049747198820114136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,float16,fp8,0,0.041206398606300355
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,2048,1,1,128,1,fp8,fp8,0,0.04120799899101257
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,float16,0,0.04535520076751709
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,float16,fp8,0,0.037195199728012086
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,2048,1,1,128,1,fp8,fp8,0,0.037108799815177916
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,float16,0,0.04511519968509674
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,float16,fp8,0,0.03505440056324005
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,2048,1,1,128,1,fp8,fp8,0,0.034995201230049136
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,float16,0,0.09469760060310364
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,float16,fp8,0,0.08828480243682861
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1536,1,1,128,1,fp8,fp8,0,0.08832640051841736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,float16,0,0.059457600116729736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,float16,fp8,0,0.05339840054512024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1536,1,1,128,1,fp8,fp8,0,0.05350720286369324
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,float16,0,0.04797439873218536
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,float16,fp8,0,0.03962079882621765
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1536,1,1,128,1,fp8,fp8,0,0.03909280002117157
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,float16,0,0.04159359931945801
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,float16,0,0.03732320070266724
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,float16,fp8,0,0.03312639892101288
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1536,1,1,128,1,fp8,fp8,0,0.033022400736808774
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,float16,0,0.03910079896450043
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,float16,fp8,0,0.030899199843406677
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1536,1,1,128,1,fp8,fp8,0,0.030955201387405394
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,float16,0,0.05996639728546142
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,float16,fp8,0,0.028726398944854736
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1536,1,1,128,1,fp8,fp8,0,0.028913599252700806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,float16,0,0.12309759855270386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,float16,fp8,0,0.10267519950866699
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,1024,1,1,128,1,fp8,fp8,0,0.10244959592819214
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,float16,fp8,0,0.05554080009460449
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,1024,1,1,128,1,fp8,fp8,0,0.05679519772529602
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,fp8,0,0.03907040059566498
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,float16,float16,0,0.04485920071601868
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,1024,1,1,128,1,fp8,fp8,0,0.03925760090351105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,float16,0,0.03731519877910614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,float16,fp8,0,0.028939199447631837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,1024,1,1,128,1,fp8,fp8,0,0.028867200016975403
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,float16,0,0.03312000036239624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,float16,fp8,0,0.02497120052576065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,1024,1,1,128,1,fp8,fp8,0,0.02483679950237274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,float16,0,0.033107200264930726
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,float16,fp8,0,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,1024,1,1,128,1,fp8,fp8,0,0.022784000635147093
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,float16,0,0.03262239992618561
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,float16,fp8,0,0.022652800381183624
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,1024,1,1,128,1,fp8,fp8,0,0.022601599991321563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,float16,0,0.039083200693130496
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,float16,0,0.11297760009765626
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,fp8,fp8,0,0.09342079758644103
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,512,1,1,128,1,float16,fp8,0,0.09282400012016297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,float16,0,0.053995198011398314
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,float16,fp8,0,0.05139679908752441
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,512,1,1,128,1,fp8,fp8,0,0.05136479735374451
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,float16,fp8,0,0.032913601398468016
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,512,1,1,128,1,fp8,fp8,0,0.033030399680137636
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,float16,0,0.03133119940757752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,float16,fp8,0,0.023820799589157105
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,512,1,1,128,1,fp8,fp8,0,0.023515200614929198
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,float16,0,0.02738400101661682
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,float16,fp8,0,0.0205935999751091
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,512,1,1,128,1,fp8,fp8,0,0.020611199736595153
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,float16,0,0.026948800683021544
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,float16,fp8,0,0.016663999855518342
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,512,1,1,128,1,fp8,fp8,0,0.016763199865818024
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,float16,0,0.027055999636650084
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,float16,fp8,0,0.01656319946050644
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,512,1,1,128,1,fp8,fp8,0,0.016616000235080718
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,float16,0,0.026907199621200563
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,float16,fp8,0,0.016545599699020384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,512,1,1,128,1,fp8,fp8,0,0.01650719940662384
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,float16,0,0.05138239860534668
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,float16,fp8,0,0.047336000204086306
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,fp8,fp8,0,0.02085919976234436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,256,1,1,128,1,fp8,fp8,0,0.04734880030155182
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,float16,0,0.03538880050182343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,float16,fp8,0,0.03097760081291199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,256,1,1,128,1,fp8,fp8,0,0.030972799658775328
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,float16,0,0.028921601176261903
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,256,1,1,128,1,float16,fp8,0,0.020819200575351714
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,float16,0,0.024820800125598907
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,float16,fp8,0,0.01655679941177368
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,256,1,1,128,1,fp8,fp8,0,0.01666560024023056
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,float16,0,0.02339999973773956
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,float16,fp8,0,0.014595200121402741
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,256,1,1,128,1,fp8,fp8,0,0.01451839953660965
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,float16,0,0.022881600260734557
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,float16,fp8,0,0.01449120044708252
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,256,1,1,128,1,fp8,fp8,0,0.014529600739479065
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,float16,0,0.03310079872608185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,float16,0,0.024700799584388734
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,fp8,fp8,0,0.026956799626350402
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,float16,fp8,0,0.012547199428081513
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,256,1,1,128,1,fp8,fp8,0,0.012559999525547028
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,fp8,0,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,float16,0,0.022755199670791627
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,float16,fp8,0,0.012464000284671784
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,256,1,1,128,1,fp8,fp8,0,0.01255200058221817
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,128,1,1,128,1,float16,fp8,0,0.026895999908447266
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,float16,float16,0,0.026892799139022826
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,128,1,1,128,1,fp8,fp8,0,0.01876319944858551
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,float16,0,0.020695999264717102
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,float16,fp8,0,0.014481599628925323
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,128,1,1,128,1,fp8,fp8,0,0.014531199634075165
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,float16,0,0.021324799954891206
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,128,1,1,128,1,fp8,fp8,0,0.012462399899959564
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,fp8,fp8,0,0.01056319996714592
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,float16,0,0.020713600516319274
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,float16,0,0.020652799308300017
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,float16,fp8,0,0.01055999994277954
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,128,1,1,128,1,fp8,fp8,0,0.01075040027499199
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,float16,0,0.02064799964427948
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,128,1,1,128,1,float16,fp8,0,0.01050880029797554
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,float16,fp8,0,0.0105103999376297
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,128,1,1,128,1,fp8,fp8,0,0.010494399815797806
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,float16,0,0.01871200054883957
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,float16,fp8,0,0.010384000092744827
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,128,1,1,128,1,fp8,fp8,0,0.010468800365924836
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,float16,0,0.026876801252365114
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,float16,fp8,0,0.018705600500106813
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,64,1,1,128,1,fp8,fp8,0,0.018587200343608855
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,float16,0,0.020718400180339814
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,float16,0,0.020635199546813966
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,float16,fp8,0,0.014472000300884247
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,64,1,1,128,1,fp8,fp8,0,0.014452800154685974
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,float16,0,0.02083200067281723
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,float16,fp8,0,0.011486399918794632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,64,1,1,128,1,fp8,fp8,0,0.012438400089740754
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,float16,fp8,0,0.010627199709415436
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,64,1,1,128,1,fp8,fp8,0,0.010496000200510025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,float16,0,0.020627200603485107
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,float16,fp8,0,0.01037440001964569
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,64,1,1,128,1,fp8,fp8,0,0.01043199971318245
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,float16,0,0.019144000113010408
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,64,1,1,128,1,fp8,fp8,0,0.0104032002389431
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,float16,0,0.01866399943828583
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,float16,fp8,0,0.008575999736785888
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,64,1,1,128,1,fp8,fp8,0,0.010406400263309478
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,float16,0,0.016728000342845918
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,float16,fp8,0,0.008473599702119828
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,64,1,1,128,1,fp8,fp8,0,0.010401599854230881
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,fp8,0,0.014679999649524688
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,float16,float16,0,0.022777600586414336
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,32,1,1,128,1,fp8,fp8,0,0.014511999487876893
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,float16,0,0.02065120041370392
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,fp8,0,0.010425599664449692
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,float16,fp8,0,0.01241919994354248
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,32,1,1,128,1,fp8,fp8,0,0.012481600046157837
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,float16,0,0.020632000267505647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,float16,fp8,0,0.010452800244092942
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,float16,0,0.018785600364208222
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,32,1,1,128,1,fp8,fp8,0,0.010393600165843963
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,float16,fp8,0,0.009600000083446502
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,float16,float16,0,0.020740799605846405
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,32,1,1,128,1,fp8,fp8,0,0.010382399708032609
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,float16,0,0.01863519996404648
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,float16,fp8,0,0.010420800000429154
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,32,1,1,128,1,fp8,fp8,0,0.008841600269079208
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,32,1,1,128,1,fp8,fp8,0,0.008515200018882752
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,float16,0,0.01855199933052063
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,fp8,fp8,0,0.009337600320577621
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,32,1,1,128,1,float16,fp8,0,0.009252800047397614
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,fp8,0,0.008611200004816055
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,float16,float16,0,0.016599999368190767
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,32,1,1,128,1,fp8,fp8,0,0.009950400143861771
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,fp8,0,0.012454400211572647
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,float16,float16,0,0.0207056000828743
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,128,16,1,1,128,1,fp8,fp8,0,0.012408000230789185
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,float16,0,0.020739200711250304
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,float16,fp8,0,0.010553599894046783
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,64,16,1,1,128,1,fp8,fp8,0,0.010692799836397171
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,float16,0,0.018673600256443025
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,fp8,0,0.008844800293445587
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,fp8,fp8,0,0.00979520007967949
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,32,16,1,1,128,1,float16,fp8,0,0.010427200049161912
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,float16,0,0.018598400056362152
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,float16,fp8,0,0.008984000235795975
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,float16,0,0.01656640022993088
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,16,16,1,1,128,1,fp8,fp8,0,0.010367999970912933
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,float16,float16,0,0.018723200261592864
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,8,16,1,1,128,1,fp8,fp8,0,0.0087567999958992
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,float16,0,0.01868479996919632
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,float16,fp8,0,0.00846719965338707
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,4,16,1,1,128,1,fp8,fp8,0,0.00851840004324913
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,float16,fp8,0,0.008529599756002426
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,2,16,1,1,128,1,fp8,fp8,0,0.00843840017914772
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,float16,0,0.018676799535751343
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,float16,fp8,0,0.008367999643087386
SGLang,0.5.6.post2,NVIDIA B200,context_attention,trtllm_mha,1,16,1,1,128,1,fp8,fp8,0,0.008376000076532364
