framework,version,device,op_name,kernel_source,batch_size,isl,num_heads,num_key_value_heads,head_dim,window_size,beam_width,attn_dtype,kv_cache_dtype,step,latency
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,1,0.02626666675011317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,1,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,3,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,3,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,7,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,7,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,15,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,15,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,31,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,31,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,63,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,63,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,127,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,127,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,255,0.02553066611289978
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,255,0.02521066615978877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,511,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,511,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,1023,0.059994667768478394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,1023,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,2047,0.10259200135866801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,2047,0.09001599748929341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,float16,4095,0.18083733320236206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,64,0,1,float16,fp8,4095,0.15615466237068176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,1,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,3,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,3,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,7,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,15,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,127,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,127,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,255,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,255,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,1023,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,1023,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,2047,0.013450667262077332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,2047,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,4095,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,4095,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,8191,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,8191,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,float16,16383,0.03602133442958196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,1,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,3,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,15,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,64,0,1,float16,fp8,16383,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,127,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,63,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,255,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,2047,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,1023,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,2047,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,4095,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,8191,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,16383,0.05160533388455709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,16383,0.059989333152770996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,1,0.04388799766699473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,1,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,3,0.04353066782156626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,3,0.042122667034467064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,7,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,7,0.04201066493988037
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,15,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,15,0.0420959989229838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,31,0.04358399907747904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,63,0.04363733530044556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,31,0.04223466912905375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,63,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,127,0.04455466568470001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,127,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,255,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,255,0.04613333443800608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,511,0.06793599824110667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,8191,0.03570666660865148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,float16,4095,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,511,0.06037333110968272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,127,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,1023,0.10699733098347981
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,float16,2047,0.1872319976488749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,2047,0.16553599635759988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,1023,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,7,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,64,0,1,float16,fp8,255,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,15,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,15,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,31,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,63,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,3,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,127,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,255,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,255,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,511,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,511,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,1023,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,1023,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,64,0,1,float16,fp8,1023,0.09411733349164327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,2047,0.024858665963013966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,2047,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,4095,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,31,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,4095,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,8191,0.062208001812299095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,8191,0.05587733288606008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,float16,16383,0.10693867007891338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,64,0,1,float16,fp8,16383,0.09273067116737366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,1,0.07223999996980031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,1,0.0781173308690389
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,3,0.07829866806666057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,15,0.07226133346557617
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,31,0.07853333155314128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,3,0.07227199772993724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,7,0.07863999903202057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,7,0.07245866457621257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,15,0.07808533310890198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,31,0.07222400108973186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,63,0.0786293347676595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,63,0.07235733171304067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,127,0.07952000200748444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,127,0.07250666618347168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,255,0.07834133505821228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,255,0.07255466779073079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,511,0.12337066729863484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,511,0.10940266648928325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,float16,1023,0.20152533054351807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,64,0,1,float16,fp8,1023,0.17655466000239053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,1,0.1479200025399526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,1,0.13433067003885904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,3,0.14647466937700906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,3,0.13395733634630838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,15,0.1344373325506846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,7,0.14798399806022644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,7,0.13495999574661255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,15,0.14783466855684915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,31,0.1477013329664866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,31,0.13396267096201578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,63,0.1338986655076345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,63,0.14601066708564758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,127,0.14652799566586813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,127,0.13567999998728433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,float16,255,0.14387200276056925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,64,0,1,float16,fp8,255,0.13182399670283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,3,0.2834080060323079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,1,0.283461332321167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,1,0.26107199986775714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,3,0.2611946662267049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,7,0.28352532784144086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,7,0.2613973418871562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,15,0.2834239999453227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,15,0.26053333282470703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,31,0.28337599833806354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,1,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,31,0.2606133421262105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,63,0.28329600890477497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,63,0.2608746687571208
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,1,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,float16,127,0.2813386718432109
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,15,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,64,0,1,float16,fp8,127,0.26030399401982623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,3,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,3,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,63,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,7,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,7,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,15,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,31,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,31,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,63,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,127,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,127,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,255,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,255,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,511,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,511,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,1023,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,1023,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,2047,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,2047,0.03309866786003113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,4095,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,4095,0.055919999877611794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,8191,0.107013334830602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,16383,0.16880534092585245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,fp8,8191,0.09332266449928284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,64,0,1,float16,float16,16383,0.19711466630299887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,1,0.5538133382797241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,1,0.5107626517613729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,3,0.5544533332188925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,3,0.5103040138880411
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,7,0.555077314376831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,7,0.5110453367233276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,15,0.5536640087763468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,15,0.5104586680730184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,31,0.5560799837112427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,31,0.5105973482131958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,float16,63,0.5542826652526855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,64,0,1,float16,fp8,63,0.510047992070516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,1,1.0978506406148274
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,1,1.0078612963358562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,3,1.0963892936706543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,3,1.0087733268737793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,7,1.0961226622263591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,7,1.0078720251719158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,1,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,1,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,15,1.0977493127187092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,3,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,3,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,7,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,7,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,31,0.017514667163292568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,15,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,float16,31,1.0957866509755452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,63,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,15,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,31,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,15,1.0088693300882976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,63,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,127,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,127,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,255,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,64,0,1,float16,fp8,31,1.0081439812978108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,255,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,511,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,511,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,1023,0.035605333745479584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,1023,0.031119999786218006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,2047,0.059232001503308616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,2047,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,4095,0.09886933366457622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,4095,0.08647466699282329
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,float16,8191,0.17678399880727133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,1,0.029114666084448498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,7,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,64,0,1,float16,fp8,8191,0.15261333187421164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,1,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,3,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,31,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,3,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,7,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,15,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,15,0.02605333427588145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,31,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,63,0.029071999092896778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,511,0.05421333511670431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,63,0.02651199946800868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,127,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,127,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,255,0.03254399945338567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,255,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,511,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,1023,0.09139733513196309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,1023,0.057818666100502014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,2047,0.17312000195185342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,2047,0.09903466701507568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,float16,4095,0.3278133273124695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,64,128,0,1,float16,fp8,4095,0.17287466923395792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,1,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,31,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,31,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,127,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,2047,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,1023,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,1023,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,2047,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,4095,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,4095,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,8191,0.035536001125971474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,8191,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,float16,16383,0.05639466643333435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,1,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,64,128,0,1,float16,fp8,16383,0.03588266670703888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,31,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,63,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,127,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,127,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,255,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,2047,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,1023,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,8191,0.05596266686916351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,1023,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,2047,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,16383,0.09781866272290547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,float16,4095,0.03518400092919668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,4095,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,8191,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,64,128,0,1,float16,fp8,16383,0.05782933533191681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,1,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,1,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,15,0.049226666490236916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,3,0.04970666766166687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,3,0.043525333205858864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,7,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,63,0.04969066878159841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,7,0.04180799921353658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,15,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,127,0.04225599765777588
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,31,0.04862933357556661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,31,0.043103997906049095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,63,0.04326933125654856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,127,0.05096533397833506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,255,0.05230399966239929
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,255,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,511,0.09365333120028178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,511,0.06488533318042755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,1023,0.1690453290939331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,1023,0.10259733597437541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,float16,2047,0.32817065715789795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,1,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,64,128,0,1,float16,fp8,2047,0.18083733320236206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,1,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,3,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,31,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,3,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,7,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,7,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,15,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,15,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,255,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,63,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,63,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,127,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,1023,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,127,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,255,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,511,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,511,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,1023,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,2047,0.037989333271980286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,2047,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,4095,0.05809600154558817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,4095,0.03910933434963226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,8191,0.10100799798965454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,8191,0.0602400004863739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,float16,16383,0.1848693291346232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,1,0.08960533142089844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,1,0.07530666887760162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,64,128,0,1,float16,fp8,16383,0.10290666421254475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,3,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,3,0.0749013324578603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,7,0.08992532889048259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,7,0.0759680022795995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,15,0.08886933326721191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,15,0.0765226682027181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,31,0.08925867080688477
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,31,0.07499200105667114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,63,0.08890666564305623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,63,0.07454933226108551
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,127,0.0885599950949351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,127,0.07656533519426982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,255,0.09332799911499023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,1023,0.19114667177200317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,255,0.07446399827798207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,511,0.1732693314552307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,fp8,511,0.11717866857846577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,3,0.1667733391125997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,64,128,0,1,float16,float16,1023,0.3240373333295186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,1,0.16636799772580466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,1,0.14175466696421304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,3,0.14193066954612732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,7,0.16670932372411093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,7,0.1418826679388682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,15,0.16672533750534058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,15,0.1418186624844869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,31,0.16674133141835532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,31,0.14181333780288696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,63,0.16644799709320068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,63,0.14203733205795288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,127,0.16506666938463846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,127,0.1418826679388682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,float16,255,0.17461333672205606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,1,0.32171199719111127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,64,128,0,1,float16,fp8,255,0.13773866494496664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,1,0.27300800879796344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,3,0.32204266389211017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,3,0.2728640039761861
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,7,0.3222773273785909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,7,0.27296000719070435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,15,0.3221973379453023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,15,0.2731413245201111
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,31,0.3220906654993693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,31,0.27299733956654865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,63,0.32200533151626587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,63,0.2728853424390157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,fp8,127,0.274618665377299
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,1,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,1,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,64,128,0,1,float16,float16,127,0.31806933879852295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,3,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,3,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,31,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,7,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,7,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,15,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,15,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,31,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,63,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,63,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,127,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,1023,0.033215999603271484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,127,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,255,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,255,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,511,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,511,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,1023,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,2047,0.060080001751581825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,2047,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,4095,0.10105599959691365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,4095,0.06006399790445963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,8191,0.18582934141159058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,8191,0.10351999600728352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,fp8,16383,0.1891146699587504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,64,128,0,1,float16,float16,16383,0.3553119897842407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,1,0.6315893332163492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,1,0.5332640012105306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,3,0.6315199931462606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,3,0.5332853396733602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,7,0.533456007639567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,7,0.6315893332163492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,15,0.6315413316090902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,15,0.5329440037409464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,31,0.6313279867172241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,31,0.5334453185399374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,float16,63,0.631168007850647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,64,128,0,1,float16,fp8,63,0.5325280030568441
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,1,1.2511040369669597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,1,1.0548746585845947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,3,1.2516000270843506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,3,1.0540586312611897
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,1,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,1,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,7,1.2506986459096272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,7,1.0542079607645671
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,3,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,15,1.0534133116404216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,15,1.2512959639231365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,3,0.01801066721479098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,7,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,7,0.018394666413466137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,15,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,15,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,31,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,31,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,63,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,63,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,127,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,fp8,31,1.054368019104004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,64,128,0,1,float16,float16,31,1.2507413228352864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,127,0.017818666994571686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,255,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,255,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,2047,0.094842662413915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,511,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,511,0.022874665757020313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,4095,0.17284266153971353
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,4095,0.09473599990208943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,1023,0.05284800132115682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,1023,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,2047,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,1,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,float16,8191,0.32914666334788006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,7,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,1,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,64,128,0,1,float16,fp8,8191,0.17009600003560385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,3,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,31,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,3,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,7,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,63,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,15,0.023232000569502514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,15,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,31,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,255,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,63,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,511,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,127,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,127,0.021546666820844013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,255,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,511,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,1023,0.04966400067011515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,1023,0.04451733330885569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,2047,0.08338133494059245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,2047,0.07285333176453908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,float16,4095,0.14562132954597473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,1,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,64,0,1,float16,fp8,4095,0.12640000383059183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,3,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,7,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,63,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,2047,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,255,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,4095,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,1023,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,1023,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,2047,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,4095,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,8191,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,8191,0.01821333294113477
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,float16,16383,0.03025600065787633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,64,0,1,float16,fp8,16383,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,31,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,3,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,7,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,7,0.010154666379094124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,15,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,127,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,1023,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,255,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,255,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,8191,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,1023,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,2047,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,2047,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,4095,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,4095,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,8191,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,float16,16383,0.05841066439946493
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,64,0,1,float16,fp8,16383,0.051914667089780174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,1,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,1,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,3,0.03532266616821289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,3,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,7,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,7,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,15,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,127,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,15,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,31,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,31,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,63,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,511,0.05400000015894572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,63,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,127,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,255,0.03653866549332937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,255,0.03148266673088074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,2047,0.14596266547838846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,511,0.04788800080617269
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,float16,1023,0.08274133503437042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,1023,0.07452799876530965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,64,0,1,float16,fp8,2047,0.12775466839472452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,1,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,1,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,7,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,7,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,15,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,15,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,31,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,31,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,127,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,127,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,255,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,511,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,2047,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,511,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,1023,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,1023,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,8191,0.05363733569780985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,2047,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,4095,0.039306665460268654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,16383,0.1069546639919281
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,4095,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,float16,8191,0.06200533111890157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,1,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,64,0,1,float16,fp8,16383,0.09299199779828389
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,1,0.05629333357016245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,3,0.060991997520128884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,15,0.056976000467936196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,3,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,7,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,7,0.057717333237330117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,127,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,15,0.06027733286221822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,31,0.06076799829800924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,31,0.05638933181762695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,63,0.060906668504079185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,63,0.0562720000743866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,127,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,255,0.06205333272616068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,255,0.05786666770776113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,511,0.09502399961153667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,511,0.08502399921417236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,3,0.11352533102035522
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,float16,1023,0.15412267049153647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,64,0,1,float16,fp8,1023,0.13567999998728433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,1,0.11318399508794148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,1,0.1029919981956482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,3,0.10315733154614766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,7,0.11344533165295918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,7,0.10326932867368062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,15,0.113237331310908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,15,0.10291733344395955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,31,0.11355200409889221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,31,0.10276800394058228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,63,0.11356799801190694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,63,0.10308800141016643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,127,0.11308800180753072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,127,0.10497066378593445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,float16,255,0.11102400223414104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,64,0,1,float16,fp8,255,0.10089600086212158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,1,0.21567465861638388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,7,0.21584532658259073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,1,0.1991680065790812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,3,0.2158986727396647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,3,0.19912532965342203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,7,0.19934399922688803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,15,0.2155839999516805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,15,0.19871999820073447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,63,0.19921066363652548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,31,0.21564799547195435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,31,0.1991893251736959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,63,0.21555199225743613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,1,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,float16,127,0.2148053248723348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,1,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,3,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,64,0,1,float16,fp8,127,0.19728533426920572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,3,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,7,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,7,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,15,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,127,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,15,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,31,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,31,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,63,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,63,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,127,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,255,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,255,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,511,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,511,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,1023,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,4095,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,1023,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,2047,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,2047,0.028186666468779247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,4095,0.05099200208981832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,8191,0.08472533027331035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,8191,0.0743093341588974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,float16,16383,0.15175466736157736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,64,0,1,float16,fp8,16383,0.13154133160909018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,1,0.41872533162434894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,1,0.3858506679534912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,3,0.4182986815770467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,3,0.3847200075785319
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,7,0.418287992477417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,7,0.3855786720911662
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,15,0.4188053210576375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,15,0.38571735223134357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,31,0.4192053476969401
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,31,0.3859039942423503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,float16,63,0.41842134793599445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,64,0,1,float16,fp8,63,0.38543999195098877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,1,0.8255573113759359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,1,0.7590880393981934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,3,0.826586643854777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,3,0.7590826352437338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,7,0.8264053662618002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,1,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,7,0.7582933108011881
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,1,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,3,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,15,0.8256639639536539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,3,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,7,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,7,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,15,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,15,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,31,0.016677333662907284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,31,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,float16,31,0.8254613081614176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,15,0.7591093381245931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,64,0,1,float16,fp8,31,0.759434700012207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,255,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,63,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,63,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,127,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,127,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,255,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,511,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,4095,0.08617599805196126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,4095,0.07606400052706401
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,511,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,1023,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,8191,0.15269333124160767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,1023,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,float16,2047,0.051957334081331887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,2047,0.04752000172932943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,1,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,64,0,1,float16,fp8,8191,0.1325226624806722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,1,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,3,0.024442667762438457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,31,0.02481599897146225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,31,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,3,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,7,0.024826665719350178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,7,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,15,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,15,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,63,0.02404266595840454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,63,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,127,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,1023,0.07391466697057088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,127,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,255,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,255,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,511,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,511,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,1023,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,2047,0.13554666439692178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,2047,0.08065066734949748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,float16,4095,0.2547360062599182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,1,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,48,48,128,0,1,float16,fp8,4095,0.13913066188494363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,3,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,3,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,7,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,15,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,63,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,63,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,127,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,1023,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,4095,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,1023,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,2047,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,2047,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,16383,0.04452266792456309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,4095,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,float16,8191,0.025909334421157837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,8191,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,48,48,128,0,1,float16,fp8,16383,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,1,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,3,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,7,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,7,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,15,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,63,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,63,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,127,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,127,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,1023,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,1023,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,2047,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,2047,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,4095,0.030906667311986286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,4095,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,1,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,3,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,8191,0.0516533354918162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,8191,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,float16,16383,0.09075199564297994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,15,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,1,0.038736000657081604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,31,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,48,48,128,0,1,float16,fp8,16383,0.05630933245023092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,3,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,7,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,7,0.03497066597143809
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,127,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,15,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,31,0.03510399907827377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,63,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,63,0.03425599883000056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,127,0.040421334405740104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,255,0.04278400043646494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,255,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,511,0.07286400099595387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,511,0.05243200063705444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,1023,0.12962133685747781
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,1023,0.0803413341442744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,float16,2047,0.24894400437672934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,1,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,48,48,128,0,1,float16,fp8,2047,0.13980266451835632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,1,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,3,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,3,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,7,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,7,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,15,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,31,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,63,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,127,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,127,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,1023,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,255,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,255,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,511,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,4095,0.037978666524092354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,511,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,1023,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,8191,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,2047,0.031898667414983116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,2047,0.02359466751416524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,1,0.058005332946777344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,4095,0.05641599992911021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,8191,0.09520000219345093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,float16,16383,0.17430400848388672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,1,0.07019199927647908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,48,48,128,0,1,float16,fp8,16383,0.10190932949384053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,3,0.07021333277225494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,3,0.058975999553998314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,7,0.0703893353541692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,7,0.05794133245944977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,15,0.07018133501211803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,15,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,31,0.07015466690063477
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,127,0.05985066791375478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,31,0.05820799867312113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,63,0.07005333403746287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,63,0.058837334314982094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,511,0.13235200444857279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,127,0.06991999844710033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,255,0.07286933561166127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,float16,1023,0.2463093400001526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,255,0.05955199897289276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,511,0.09092799822489421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,48,48,128,0,1,float16,fp8,1023,0.14754666884740195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,3,0.1090186635653178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,1,0.12956800063451132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,1,0.10942932963371277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,3,0.1276853382587433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,7,0.1291146675745646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,7,0.11050132910410564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,15,0.1295413374900818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,63,0.127893328666687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,15,0.10909333825111389
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,31,0.12963199615478516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,31,0.10923199852307637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,63,0.11008532842000325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,127,0.12757333119710287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,127,0.10931199789047241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,1,0.24429333209991455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,float16,255,0.13395733634630838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,48,48,128,0,1,float16,fp8,255,0.10691733161608379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,1,0.20736533403396606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,3,0.24429333209991455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,3,0.20742400487263998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,7,0.24427199363708496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,7,0.20749332507451376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,15,0.24452267090479532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,15,0.2074186603228251
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,31,0.24463466803232828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,31,0.2074506680170695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,63,0.24423466126124063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,63,0.20754132668177286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,3,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,1,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,7,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,float16,127,0.24231467644373575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,15,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,31,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,1,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,48,48,128,0,1,float16,fp8,127,0.20714133977890015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,3,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,7,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,15,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,31,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,63,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,63,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,127,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,127,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,255,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,255,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,2047,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,511,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,511,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,1023,0.024911999702453613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,1023,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,2047,0.0483893354733785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,4095,0.08040533463160197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,4095,0.0496319979429245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,8191,0.14294933279355368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,8191,0.08142933249473572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,float16,16383,0.2702239950497945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,48,48,128,0,1,float16,fp8,16383,0.14626666903495789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,1,0.47756799062093097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,1,0.40396801630655926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,3,0.4775306781133016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,3,0.4028533299763997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,7,0.4775306781133016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,7,0.4025866587956746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,15,0.4780106544494629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,15,0.40379734834035236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,31,0.47763200600941974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,31,0.4040106534957886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,float16,63,0.4777119954427083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,48,48,128,0,1,float16,fp8,63,0.4021759827931722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,1,0.7955199877421061
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,1,0.9404906431833903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,3,0.9434080123901367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,3,0.7951467037200928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,7,0.7949653466542562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,7,0.9430293242136637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,1,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,15,0.9424693584442139
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,1,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,3,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,15,0.7970346609751383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,3,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,15,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,7,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,7,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,15,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,31,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,31,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,float16,31,0.9423147042592367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,127,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,48,48,128,0,1,float16,fp8,31,0.7937173048655192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,63,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,63,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,127,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,255,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,255,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,2047,0.08030400176843007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,511,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,511,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,1023,0.0454720010360082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,1023,0.031162666777769726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,2047,0.05130666494369507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,4095,0.1442080040772756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,4095,0.08255466818809509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,1,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,float16,8191,0.2714719971021016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,48,48,128,0,1,float16,fp8,8191,0.1460640033086141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,1,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,3,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,3,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,7,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,7,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,15,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,63,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,15,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,31,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,31,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,255,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,63,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,511,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,127,0.020330666253964107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,127,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,255,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,511,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,1023,0.04288533329963684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,1023,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,4095,0.10634666681289673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,2047,0.07116266588370006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,fp8,2047,0.06366933385531108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,64,0,1,float16,float16,4095,0.12144000331560771
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,1,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,1,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,3,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,15,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,31,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,63,0.010293333480755487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,511,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,127,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,127,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,2047,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,255,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,511,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,8191,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,1023,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,1023,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,2047,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,16383,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,1,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,float16,4095,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,4095,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,8191,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,64,0,1,float16,fp8,16383,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,1,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,15,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,15,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,31,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,255,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,4095,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,1023,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,8191,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,1023,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,2047,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,2047,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,4095,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,8191,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,float16,16383,0.05825066566467285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,1,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,7,0.03154666721820831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,64,0,1,float16,fp8,16383,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,1,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,15,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,3,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,31,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,3,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,7,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,15,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,31,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,63,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,63,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,511,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,127,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,127,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,255,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,255,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,511,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,1023,0.07246933380762736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,1023,0.06412266691525777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,float16,2047,0.1257973313331604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,64,0,1,float16,fp8,2047,0.11071466406186421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,1,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,3,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,3,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,15,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,63,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,511,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,511,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,1023,0.015775999675194424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,1023,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,2047,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,2047,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,4095,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,1,0.051925331354141235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,4095,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,8191,0.06161599854628245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,3,0.05173333485921224
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,8191,0.050586665670077004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,float16,16383,0.10672533512115479
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,15,0.05194133520126343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,64,0,1,float16,fp8,16383,0.09212266405423482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,1,0.049642667174339294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,3,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,7,0.05208533505598704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,7,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,15,0.04852266609668732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,31,0.05182399849096934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,31,0.04962133367856344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,63,0.051818668842315674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,63,0.049584001302719116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,127,0.0533493310213089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,127,0.04800533254941305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,255,0.05390933156013489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,255,0.04820266862710317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,511,0.08213866750399272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,511,0.07361066838105519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,float16,1023,0.12967999776204428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,3,0.0885599950949351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,64,0,1,float16,fp8,1023,0.1153706709543864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,1,0.09475200374921162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,1,0.08689600229263306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,3,0.09512000282605489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,7,0.09494400024414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,7,0.0867680013179779
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,63,0.09506133198738098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,15,0.09510933359464009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,15,0.08852799733479817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,31,0.09545600414276123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,31,0.08749333024024963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,63,0.08690667152404785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,127,0.0958079993724823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,127,0.08674133817354839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,1,0.16646400094032288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,float16,255,0.09473066528638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,64,0,1,float16,fp8,255,0.08674666285514832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,1,0.18307733535766602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,7,0.18122132619222006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,3,0.18152000506718954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,3,0.16679465770721436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,7,0.16639467080434164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,15,0.18134933710098267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,15,0.16718933979670206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,31,0.18265599012374878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,31,0.16646933555603027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,63,0.18107734123865762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,63,0.1662666698296865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,1,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,1,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,fp8,127,0.16658666729927063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,64,0,1,float16,float16,127,0.1807466745376587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,3,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,3,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,7,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,7,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,15,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,15,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,31,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,31,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,63,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,63,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,511,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,127,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,127,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,255,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,255,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,2047,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,511,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,1023,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,1023,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,2047,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,4095,0.050154666105906166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,4095,0.0435146689414978
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,8191,0.08469333251317342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,8191,0.07292800148328145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,float16,16383,0.15205867091814676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,64,0,1,float16,fp8,16383,0.12971199552218118
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,1,0.3511253197987874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,1,0.3238933285077413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,3,0.35207998752593994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,3,0.32410667339960736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,7,0.3526506821314494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,7,0.32345600922902423
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,15,0.3508853514989217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,15,0.3235306739807129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,31,0.3521759907404582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,31,0.3242879907290141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,float16,63,0.35226666927337646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,64,0,1,float16,fp8,63,0.3245226740837097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,1,0.6335573196411133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,1,0.690661350886027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,3,0.6909920374552408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,3,0.6348479986190796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,7,0.6906987031300863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,1,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,7,0.6352159976959229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,1,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,15,0.690666675567627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,3,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,3,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,15,0.6348160107930502
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,7,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,7,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,15,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,15,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,31,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,31,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,63,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,float16,31,0.6905972957611084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,63,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,64,0,1,float16,fp8,31,0.6347200075785319
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,127,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,127,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,255,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,255,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,511,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,511,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,1023,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,1023,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,8191,0.13125866651535034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,2047,0.0473280002673467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,2047,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,float16,4095,0.07421866556008656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,3,0.020309332758188248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,4095,0.06596266726652782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,1,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,64,0,1,float16,fp8,8191,0.11342400312423706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,1,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,3,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,7,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,7,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,15,0.021701333423455555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,15,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,31,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,31,0.019999999552965164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,255,0.020608000457286835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,511,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,63,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,63,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,127,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,127,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,255,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,511,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,1023,0.06358399987220764
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,1023,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,2047,0.11519466837247212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,2047,0.06850666801134746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,float16,4095,0.21312532822291055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,1,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,40,40,128,0,1,float16,fp8,4095,0.11754133303960164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,7,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,15,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,31,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,63,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,127,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,127,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,1023,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,255,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,1023,0.012053333222866058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,2047,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,4095,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,4095,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,8191,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,8191,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,float16,16383,0.04167999823888143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,40,40,128,0,1,float16,fp8,16383,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,7,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,7,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,127,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,511,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,1023,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,2047,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,2047,0.016565332810084026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,4095,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,4095,0.022650666534900665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,8191,0.05100800096988678
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,8191,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,float16,16383,0.08781333764394124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,40,40,128,0,1,float16,fp8,16383,0.056517332792282104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,1,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,1,0.03070933371782303
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,3,0.03399466723203659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,3,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,7,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,63,0.03409066547950109
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,7,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,15,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,127,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,15,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,31,0.03366400053103765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,31,0.030933332939942677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,63,0.03121600051720937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,127,0.035391998787721
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,255,0.03895466774702072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,255,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,511,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,511,0.04578666885693868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,1023,0.11129066348075867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,1023,0.07014933228492737
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,float16,2047,0.21363200743993124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,7,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,40,40,128,0,1,float16,fp8,2047,0.12143466869990031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,3,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,3,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,15,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,63,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,63,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,127,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,255,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,255,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,511,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,511,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,4095,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,1023,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,1023,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,2047,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,2047,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,4095,0.055018668373425804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,8191,0.09368000427881877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,8191,0.0599839985370636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,float16,16383,0.1721013387044271
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,40,40,128,0,1,float16,fp8,16383,0.10155733426411946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,1,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,1,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,3,0.05983999868233999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,3,0.0499946673711141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,7,0.05978666742642721
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,31,0.05159999926884969
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,7,0.04990933338801066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,15,0.05948266883691152
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,63,0.05006400247414907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,127,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,15,0.05180266499519348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,127,0.052501335740089417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,31,0.059877331058184304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,63,0.0598826656738917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,255,0.06319466729958852
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,255,0.05175999800364176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,511,0.11426132917404175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,511,0.07838933169841766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,float16,1023,0.20729599396387735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,40,40,128,0,1,float16,fp8,1023,0.12441600362459819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,1,0.10937066872914632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,1,0.093231995900472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,3,0.1090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,3,0.09310932954152425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,7,0.10964799920717876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,7,0.09258133172988892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,15,0.10982400178909302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,15,0.09252799550692241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,31,0.10940800110499065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,127,0.10903466741243999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,31,0.09337066610654195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,63,0.10965333382288615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,63,0.09412266810735066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,127,0.09452266494433086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,float16,255,0.11526933312416077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,3,0.20729599396387735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,40,40,128,0,1,float16,fp8,255,0.09300266702969869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,1,0.2073813279469808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,1,0.1762239933013916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,3,0.17654399077097574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,15,0.20733332633972168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,7,0.2074026664098104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,7,0.175653338432312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,15,0.17597333590189615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,31,0.2071466644605001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,31,0.1764799952507019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,63,0.2072640061378479
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,127,0.17491199572881064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,fp8,63,0.1752693255742391
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,1,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,40,40,128,0,1,float16,float16,127,0.20517865816752115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,3,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,3,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,7,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,7,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,15,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,127,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,15,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,31,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,255,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,31,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,63,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,63,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,127,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,255,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,511,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,511,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,4095,0.04956266780694326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,1023,0.021615999440352123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,1023,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,8191,0.082096000512441
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,2047,0.04711999992529551
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,2047,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,4095,0.07738133271535237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,8191,0.13877866665522257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,float16,16383,0.26206932465235394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,40,40,128,0,1,float16,fp8,16383,0.14587199687957764
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,1,0.399344007174174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,1,0.3387306531270345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,3,0.40015467007954914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,3,0.3387840191523234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,7,0.3997226556142171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,7,0.33881068229675293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,15,0.399616003036499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,15,0.33866135279337567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,31,0.3997226556142171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,31,0.33834131558736164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,float16,63,0.3998560110727946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,40,40,128,0,1,float16,fp8,63,0.3384693463643392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,1,0.7854133447011312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,1,0.6639573176701864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,3,0.7851466337839762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,3,0.6627253293991089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,7,0.6632479826609293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,7,0.7850026289621989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,15,0.7862933476765951
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,1,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,1,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,3,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,3,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,7,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,7,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,15,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,15,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,31,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,31,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,31,0.6622879902521769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,fp8,15,0.6667306423187256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,63,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,40,40,128,0,1,float16,float16,31,0.7871466477711996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,63,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,127,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,127,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,255,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,255,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,511,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,511,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,1023,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,1023,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,2047,0.0701333334048589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,2047,0.04557866851488749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,4095,0.12170132994651794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,4095,0.07252266506354015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,float16,8191,0.22747200727462769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,1,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,40,40,128,0,1,float16,fp8,8191,0.12549333771069845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,7,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,1,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,3,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,3,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,7,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,15,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,15,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,127,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,31,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,255,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,31,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,63,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,63,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,127,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,255,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,511,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,511,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,1023,0.03562666724125544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,1023,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,2047,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,2047,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,4095,0.09948800007502238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,4095,0.08649599552154541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,float16,8191,0.1768959959348043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,64,0,1,float16,fp8,8191,0.15223466356595358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,1,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,7,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,15,0.010133333504199982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,31,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,31,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,255,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,255,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,2047,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,511,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,1023,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,2047,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,16383,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,4095,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,4095,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,8191,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,3,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,8191,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,16383,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,1,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,float16,32767,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,1,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,64,0,1,float16,fp8,32767,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,7,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,7,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,15,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,15,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,31,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,31,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,63,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,127,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,127,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,255,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,255,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,511,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,1023,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,1023,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,2047,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,2047,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,4095,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,4095,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,8191,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,8191,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,1,0.025727999707063038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,16383,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,16383,0.030495998760064442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,float16,32767,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,1,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,64,0,1,float16,fp8,32767,0.05171200136343638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,3,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,3,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,31,0.026170666019121807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,7,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,7,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,15,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,15,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,31,0.02552533398071925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,63,0.025706666211287182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,63,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,127,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,127,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,255,0.026335999369621277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,255,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,2047,0.10313600301742554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,511,0.04004266609748205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,511,0.035274667044480644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,4095,0.18105600277582803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,float16,1023,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,1023,0.05373866856098175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,2047,0.09073600172996521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,1,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,64,0,1,float16,fp8,4095,0.15677866339683533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,3,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,7,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,15,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,31,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,63,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,127,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,127,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,511,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,1023,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,1023,0.013450667262077332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,2047,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,2047,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,4095,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,4095,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,8191,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,8191,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,16383,0.0609386662642161
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,16383,0.05394133428732554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,1,0.043749332427978516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,float16,32767,0.1066986620426178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,1,0.04072533299525579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,64,0,1,float16,fp8,32767,0.09299733241399129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,3,0.04363200068473816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,3,0.041077333192030586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,7,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,7,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,15,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,15,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,31,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,31,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,255,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,63,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,63,0.04068266600370407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,127,0.04354133208592733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,127,0.03965866565704346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,255,0.04432533184687296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,511,0.06806933383146922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,511,0.0602400004863739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,1023,0.10708799958229065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,1023,0.09471999605496724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,3,0.077674667040507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,float16,2047,0.1890773375829061
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,64,0,1,float16,fp8,2047,0.1644000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,1,0.07751999795436859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,1,0.07117333511511485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,3,0.07158933579921722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,7,0.07834666470686595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,7,0.07222933570543925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,15,0.0775626649459203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,15,0.07228800157705943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,31,0.07737599809964497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,31,0.07111466427644093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,63,0.07733866572380066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,63,0.07197333375612895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,127,0.07840533554553986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,127,0.07134399811426799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,255,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,255,0.0718560020128886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,float16,511,0.12160000205039978
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,64,0,1,float16,fp8,511,0.10909333825111389
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,1,0.1462559998035431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,1,0.13368533054987589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,3,0.14658133188883463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,3,0.13385599851608276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,7,0.14628799756368002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,7,0.13362133502960205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,15,0.14803199966748556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,15,0.1337440013885498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,31,0.14627200365066528
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,31,0.13362133502960205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,63,0.1476853291193644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,63,0.13366400202115378
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,127,0.14619200428326926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,127,0.13570666313171387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,1,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,float16,255,0.14402133226394653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,1,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,64,0,1,float16,fp8,255,0.13197867075602213
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,3,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,31,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,7,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,63,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,7,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,15,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,31,0.012538666526476542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,15,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,63,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,127,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,127,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,255,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,2047,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,255,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,511,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,511,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,1023,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,1023,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,2047,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,4095,0.04035199930270513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,4095,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,8191,0.06222933530807495
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,8191,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,16383,0.10735467076301575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,16383,0.0925333301226298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,float16,32767,0.19722666343053183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,64,0,1,float16,fp8,32767,0.17222932974497476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,1,0.28359999259312946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,1,0.2605813344319661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,3,0.28330665826797485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,3,0.2606079975763957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,7,0.2834239999453227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,7,0.2600693305333455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,15,0.2831946611404419
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,15,0.26070932547251385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,31,0.28306132555007935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,31,0.2607146700223287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,63,0.283242662747701
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,63,0.2608960072199504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,float16,127,0.2812533378601074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,64,0,1,float16,fp8,127,0.25918400287628174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,1,0.5552586714426676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,1,0.5103679895401001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,3,0.5552800099054972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,3,0.5104586680730184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,7,0.510047992070516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,7,0.558240016301473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,15,0.5553653240203857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,15,0.5091679890950521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,1,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,1,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,3,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,3,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,7,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,7,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,15,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,31,0.5545653502146403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,15,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,31,0.5089866717656454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,31,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,31,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,float16,63,0.555178682009379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,64,0,1,float16,fp8,63,0.5100266536076864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,63,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,63,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,127,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,127,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,255,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,255,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,511,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,511,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,1023,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,1023,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,2047,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,2047,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,4095,0.06385600070158641
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,4095,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,8191,0.10708266496658325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,8191,0.09401599566141765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,1,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,float16,16383,0.1971199909845988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,1,0.017887999614079792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,64,0,1,float16,fp8,16383,0.1699626644452413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,15,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,3,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,3,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,7,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,7,0.017594666530688603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,15,0.018218666315078735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,31,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,31,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,63,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,63,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,511,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,127,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,127,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,255,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,255,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,511,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,1023,0.05213866631189982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,1023,0.035189333061377205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,2047,0.09479467074076335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,2047,0.058090666929880776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,4095,0.1723733345667521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,4095,0.09469866752624512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,1,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,fp8,8191,0.17067732413609824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,32,128,0,1,float16,float16,8191,0.3269760012626648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,1,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,7,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,127,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,255,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,511,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,1023,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,8191,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,1023,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,2047,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,2047,0.013301332791646322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,4095,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,4095,0.014202666779359182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,8191,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,16383,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,16383,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,float16,32767,0.056143999099731445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,1,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,32,128,0,1,float16,fp8,32767,0.03626666714747747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,3,0.010426666587591171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,31,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,7,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,15,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,31,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,63,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,127,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,2047,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,511,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,4095,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,1023,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,1023,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,16383,0.057429333527882896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,2047,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,4095,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,8191,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,8191,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,16383,0.035936000446478523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,1,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,float16,32767,0.09891200065612793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,1,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,7,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,3,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,32,128,0,1,float16,fp8,32767,0.05834666887919108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,3,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,31,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,7,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,15,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,15,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,31,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,255,0.03290133426586787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,63,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,63,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,127,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,127,0.026202666262785595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,255,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,511,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,511,0.03841600070397059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,1023,0.0906773308912913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,1023,0.05774933099746704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,2047,0.1720693310101827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,2047,0.09902933239936829
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,3,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,float16,4095,0.32515732447306317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,32,128,0,1,float16,fp8,4095,0.17389333248138428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,15,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,31,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,511,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,4095,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,1023,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,1023,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,2047,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,2047,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,4095,0.03754666695992152
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,8191,0.05820266902446747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,8191,0.037861332297325134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,1,0.042064001162846885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,16383,0.10041600465774536
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,16383,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,1,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,float16,32767,0.18338133891423544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,3,0.049551998575528465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,32,128,0,1,float16,fp8,32767,0.10220799843470256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,3,0.04171733558177948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,7,0.04923733572165171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,7,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,15,0.0496373325586319
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,15,0.04200533529122671
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,31,0.04970666766166687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,31,0.0428959975639979
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,63,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,63,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,127,0.05073066552480062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,127,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,255,0.05292266607284546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,255,0.043466667334238686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,511,0.09308800101280212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,511,0.0655626654624939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,1023,0.1688106656074524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,1023,0.10262399911880493
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,3,0.08918399612108867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,3,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,float16,2047,0.32529600461324054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,1,0.08861333131790161
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,32,128,0,1,float16,fp8,2047,0.1809920072555542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,1,0.07459733386834462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,7,0.08934932947158813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,7,0.07625600198904674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,63,0.08885332942008972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,63,0.07554666697978973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,15,0.08961600065231323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,15,0.07452266911665599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,31,0.08875733613967896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,31,0.07498133182525635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,127,0.08864532907803853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,127,0.07691200077533722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,255,0.09281599521636963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,255,0.07537066439787547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,float16,511,0.17124267419179282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,32,128,0,1,float16,fp8,511,0.11786666512489319
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,1,0.16644799709320068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,1,0.14178666472434998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,3,0.16705065965652466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,3,0.14229333400726318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,7,0.1421333352724711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,7,0.16779732704162598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,31,0.14204266667366028
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,15,0.16633599996566772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,15,0.1421333352724711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,63,0.14221866925557455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,31,0.1665440003077189
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,63,0.16696532567342123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,127,0.1674720048904419
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,127,0.14194666345914206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,1,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,float16,255,0.17430400848388672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,32,128,0,1,float16,fp8,255,0.13979199528694153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,3,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,3,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,7,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,15,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,31,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,63,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,63,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,127,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,127,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,255,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,255,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,511,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,511,0.01370666672786077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,4095,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,1023,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,1023,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,2047,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,2047,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,4095,0.058490668733914696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,8191,0.10079999764760335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,8191,0.061008001367251076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,16383,0.18523732821146646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,16383,0.10318932930628459
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,fp8,32767,0.18690133094787598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,32,128,0,1,float16,float16,32767,0.3543039957682292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,1,0.27287999788920086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,1,0.3230560024579366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,3,0.32233599821726483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,7,0.3222399950027466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,3,0.274181326230367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,7,0.27351999282836914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,15,0.322437326113383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,15,0.2731626629829407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,31,0.32202666997909546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,63,0.322437326113383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,31,0.2736799915631612
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,63,0.2730293273925781
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,fp8,127,0.27132266759872437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,32,128,0,1,float16,float16,127,0.3210933407147725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,1,0.6320000092188517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,1,0.5352533260981241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,3,0.633135994275411
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,3,0.5350293318430582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,7,0.5339946746826172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,15,0.632207989692688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,1,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,1,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,3,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,7,0.6335039933522543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,3,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,7,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,15,0.5343946615854899
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,31,0.5348373254140218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,31,0.6319040060043335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,7,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,15,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,15,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,float16,63,0.6328853368759155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,32,128,0,1,float16,fp8,63,0.534602681795756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,127,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,31,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,31,0.0145066666106383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,63,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,63,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,127,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,255,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,255,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,2047,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,511,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,4095,0.0613973339398702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,511,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,1023,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,1023,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,2047,0.059061333537101746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,4095,0.10130666693051656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,8191,0.18521066506703696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,8191,0.10496532917022705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,1,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,float16,16383,0.35363201300303143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,32,128,0,1,float16,fp8,16383,0.18914665778477988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,1,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,3,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,31,0.016309333344300587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,3,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,7,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,7,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,15,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,15,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,31,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,63,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,63,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,127,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,127,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,255,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,255,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,2047,0.052986666560173035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,511,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,511,0.01953599974513054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,4095,0.08660800258318584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,1023,0.031221332649389904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,1023,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,2047,0.04571199913819631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,4095,0.0758240024248759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,1,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,float16,8191,0.1527733306090037
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,64,0,1,float16,fp8,8191,0.13368533054987589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,3,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,7,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,7,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,15,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,63,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,127,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,255,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,1023,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,1023,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,2047,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,2047,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,4095,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,4095,0.014197333405415217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,8191,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,8191,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,16383,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,16383,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,float16,32767,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,1,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,64,0,1,float16,fp8,32767,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,1,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,3,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,7,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,7,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,15,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,31,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,63,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,127,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,127,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,255,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,255,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,1023,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,1023,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,2047,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,2047,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,4095,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,4095,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,8191,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,8191,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,1,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,1,0.02388266722361247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,16383,0.031130666534105938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,16383,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,float16,32767,0.047983999053637184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,64,0,1,float16,fp8,32767,0.041562666495641075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,15,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,3,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,3,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,7,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,7,0.02170666555563609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,15,0.02221333235502243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,31,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,31,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,63,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,63,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,127,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,1023,0.049626668294270836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,127,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,255,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,255,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,2047,0.07387733459472656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,511,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,511,0.02812266598145167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,1023,0.04375466704368591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,2047,0.0844533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,float16,4095,0.14411733547846475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,1,0.010293333480755487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,64,0,1,float16,fp8,4095,0.12623467048009238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,3,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,3,0.009914666414260864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,15,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,15,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,31,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,31,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,63,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,127,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,1023,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,255,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,1023,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,2047,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,2047,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,4095,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,4095,0.02201066662867864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,8191,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,8191,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,16383,0.06155733267466227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,16383,0.052042668064435325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,float16,32767,0.1051680048306783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,1,0.03566399961709976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,1,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,64,0,1,float16,fp8,32767,0.09150399764378865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,3,0.03540800015131632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,3,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,7,0.035504000882307686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,7,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,15,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,15,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,31,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,31,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,63,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,63,0.0331839993596077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,127,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,127,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,255,0.03605333218971888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,255,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,511,0.053770666321118675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,511,0.048581331968307495
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,1023,0.07256533205509186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,1023,0.08239999910195668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,float16,2047,0.14563199877738953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,64,0,1,float16,fp8,2047,0.12944533427556357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,1,0.06125866870085398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,1,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,15,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,3,0.06154666841030121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,3,0.05677866439024607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,7,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,63,0.06005333364009857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,7,0.05770133435726166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,15,0.06122666597366333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,31,0.06121066709359487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,31,0.05579199890295664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,63,0.056186666091283165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,127,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,127,0.05608533322811127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,255,0.06159999966621399
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,1,0.11314666271209717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,255,0.05764799813429514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,float16,511,0.09533333778381348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,64,0,1,float16,fp8,511,0.08478400111198425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,1,0.10312533378601074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,3,0.11361066500345866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,3,0.10288533568382263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,7,0.11309867103894551
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,15,0.11329600214958191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,7,0.10341866811116536
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,31,0.11311466495196025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,15,0.10322667161623637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,127,0.11242133378982544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,31,0.10299733281135559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,1,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,127,0.10501866539319356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,63,0.11315199732780457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,63,0.10283199946085612
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,1,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,float16,255,0.11080533266067505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,64,0,1,float16,fp8,255,0.10099732875823975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,3,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,3,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,7,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,15,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,31,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,511,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,63,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,127,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,127,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,511,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,1023,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,1023,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,2047,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,2047,0.022319999833901722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,4095,0.03783999880154928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,4095,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,8191,0.06190933287143707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,8191,0.05366933345794678
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,16383,0.10697600245475769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,16383,0.09244799613952637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,float16,32767,0.19748800992965698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,64,0,1,float16,fp8,32767,0.16879467169443765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,1,0.21561066309611002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,1,0.19909866650899252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,3,0.21549334128697714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,3,0.1993173360824585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,7,0.21556800603866577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,7,0.19824532667795816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,15,0.21591466665267944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,15,0.19939200083414713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,31,0.21557333072026572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,31,0.197818656762441
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,63,0.2158880035082499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,63,0.19825067122777304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,float16,127,0.2140586574872335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,64,0,1,float16,fp8,127,0.19766932725906372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,1,0.42020265261332196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,1,0.3850133419036865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,3,0.42297065258026123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,3,0.38581868012746173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,7,0.42020265261332196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,7,0.3848586479822795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,1,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,15,0.42016534010569256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,1,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,3,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,3,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,7,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,15,0.3853813409805298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,15,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,7,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,31,0.3847999970118205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,31,0.4196000099182129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,15,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,63,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,31,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,31,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,63,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,255,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,float16,63,0.42022399107615155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,64,0,1,float16,fp8,63,0.3856213490168254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,127,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,127,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,255,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,511,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,511,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,1023,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,1023,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,2047,0.034688000877698265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,2047,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,4095,0.0513866643110911
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,4095,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,8191,0.08453866839408875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,3,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,8191,0.07428800066312154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,float16,16383,0.15161066253980002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,1,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,64,0,1,float16,fp8,16383,0.13160000244776407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,1,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,3,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,7,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,7,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,63,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,15,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,15,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,31,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,31,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,63,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,127,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,1023,0.04422399898370107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,127,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,255,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,255,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,511,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,4095,0.14425599575042725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,4095,0.08401067058245341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,511,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,1023,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,2047,0.08076799909273784
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,2047,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,float16,8191,0.2715199987093608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,24,24,128,0,1,float16,fp8,8191,0.14758933583895364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,15,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,3,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,7,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,15,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,31,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,31,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,63,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,255,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,511,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,8191,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,2047,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,2047,0.014495999862750372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,4095,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,4095,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,8191,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,1,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,16383,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,16383,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,1,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,float16,32767,0.04562133550643921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,24,24,128,0,1,float16,fp8,32767,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,3,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,3,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,7,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,31,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,63,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,63,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,127,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,255,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,2047,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,2047,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,1023,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,1023,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,16383,0.04619200030962626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,4095,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,4095,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,8191,0.025781333446502686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,8191,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,16383,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,1,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,1,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,float16,32767,0.07515733440717061
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,24,24,128,0,1,float16,fp8,32767,0.046800002455711365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,15,0.022917332748572033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,3,0.02366400013367335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,3,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,63,0.0240639994541804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,7,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,7,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,15,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,31,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,31,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,63,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,511,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,127,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,127,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,1023,0.047498668233553566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,255,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,255,0.022618666291236877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,511,0.044549331068992615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,1023,0.07287999987602234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,2047,0.13607999682426453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,2047,0.08126399914423625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,float16,4095,0.25512532393137616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,24,24,128,0,1,float16,fp8,4095,0.1397546629110972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,3,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,7,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,15,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,31,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,127,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,511,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,511,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,4095,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,1023,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,2047,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,2047,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,4095,0.032560000816980995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,8191,0.05442133545875549
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,8191,0.03731200098991394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,16383,0.09286399682362874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,16383,0.05796800057093302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,1,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,float16,32767,0.1678559978802999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,24,24,128,0,1,float16,fp8,32767,0.10097600022951762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,1,0.03538133452335993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,3,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,3,0.03469866762558619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,7,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,7,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,63,0.03896533449490865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,15,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,15,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,31,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,31,0.034287999073664345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,63,0.03542399903138479
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,127,0.04091199984153112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,127,0.033786666889985405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,255,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,255,0.0351946676770846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,511,0.07434133191903432
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,511,0.05208000044027964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,1023,0.12980799873669943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,1023,0.08032533526420593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,float16,2047,0.24857600529988608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,24,24,128,0,1,float16,fp8,2047,0.1402773360411326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,1,0.07042666773001353
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,1,0.05794133245944977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,3,0.06971199810504913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,3,0.05816000203291575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,31,0.06997333467006683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,7,0.07004266480604808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,63,0.06983466446399689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,7,0.05959466596444448
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,15,0.07043200234572093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,15,0.05789866546789805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,31,0.057904000083605446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,63,0.05793066819508871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,127,0.0701279987891515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,127,0.06010133524735769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,255,0.07272533575693767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,255,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,float16,511,0.1333013375600179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,24,24,128,0,1,float16,fp8,511,0.0906933347384135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,1,0.12957866986592612
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,7,0.12941333651542664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,1,0.10979732871055603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,3,0.12950399518013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,3,0.10929066936175029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,7,0.10967999696731567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,15,0.12979732950528464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,15,0.10981866717338562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,63,0.11106666922569275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,31,0.12923733393351236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,127,0.12759466965993246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,31,0.11103999614715576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,63,0.1281599998474121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,1,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,127,0.10969066619873047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,1,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,float16,255,0.13547733426094055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,15,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,24,24,128,0,1,float16,fp8,255,0.1083679993947347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,3,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,3,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,7,0.01138666644692421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,15,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,31,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,31,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,127,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,127,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,1023,0.01643199970324834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,255,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,255,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,511,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,4095,0.03737066686153412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,511,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,1023,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,2047,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,2047,0.024138666689395905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,4095,0.05643199880917867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,8191,0.09545600414276123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,8191,0.060218666990598045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,16383,0.10292266805966695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,fp8,32767,0.1872640053431193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,16383,0.17493333419164023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,24,24,128,0,1,float16,float16,32767,0.334389328956604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,1,0.24493332703908285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,1,0.20861866076787314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,3,0.24490133921305338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,3,0.20754667123158774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,7,0.24471465746561685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,7,0.20844799280166626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,15,0.24609599510828653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,15,0.2077173391977946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,31,0.2086720069249471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,31,0.24491200844446817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,63,0.24454933404922485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,63,0.20848000049591064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,float16,127,0.24288000663121542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,24,24,128,0,1,float16,fp8,127,0.2079306642214457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,1,0.475930651028951
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,1,0.4039093255996704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,3,0.4758613506952922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,3,0.4039040009180705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,7,0.47754132747650146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,7,0.4041386842727661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,15,0.4769386847813924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,1,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,3,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,3,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,15,0.40377600987752277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,7,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,31,0.4759360154469808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,7,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,31,0.40352535247802734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,15,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,15,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,63,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,float16,63,0.4761173327763875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,24,24,128,0,1,float16,fp8,63,0.40465601285298664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,31,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,255,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,255,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,511,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,31,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,63,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,127,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,127,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,511,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,4095,0.050016000866889954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,1023,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,8191,0.14326933026313782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,1023,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,2047,0.04771733283996582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,2047,0.03245333333810171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,16383,0.14633066455523172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,4095,0.08037333190441132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,fp8,8191,0.08216000099976857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,1,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,24,24,128,0,1,float16,float16,16383,0.2698400020599365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,1,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,3,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,3,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,7,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,7,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,15,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,127,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,15,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,31,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,31,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,63,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,63,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,127,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,255,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,2047,0.040021332601706185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,255,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,511,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,511,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,1023,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,1023,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,2047,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,4095,0.06338133414586385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,4095,0.05601066847642263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,8191,0.10910933216412862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,8191,0.09435733159383138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,3,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,1,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,float16,16383,0.1981066664059957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,64,0,1,float16,fp8,16383,0.17057067155838013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,7,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,15,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,255,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,63,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,1023,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,4095,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,1023,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,2047,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,2047,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,4095,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,8191,0.01623999948302905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,8191,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,16383,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,16383,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,32767,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,32767,0.02178666740655899
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,1,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,3,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,3,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,float16,65535,0.037258667250474296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,64,0,1,float16,fp8,65535,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,7,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,7,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,15,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,31,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,63,0.009882666791478792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,127,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,255,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,255,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,2047,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,8191,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,1023,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,2047,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,4095,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,4095,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,8191,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,16383,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,16383,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,32767,0.03754133234421412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,1,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,32767,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,7,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,1,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,3,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,float16,65535,0.06069866816202799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,3,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,31,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,64,0,1,float16,fp8,65535,0.051669334371884666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,7,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,15,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,15,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,31,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,63,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,63,0.017664000391960144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,127,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,127,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,255,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,255,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,511,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,511,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,4095,0.0988159974416097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,1023,0.03777066618204117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,1023,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,1,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,2047,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,2047,0.05373866856098175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,4095,0.08649599552154541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,float16,8191,0.1768266757329305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,64,0,1,float16,fp8,8191,0.15246933698654175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,3,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,15,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,31,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,31,0.010053333515922228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,63,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,511,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,127,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,2047,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,255,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,511,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,1023,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,1023,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,2047,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,4095,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,4095,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,8191,0.023728000621000927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,8191,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,16383,0.040991999208927155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,16383,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,32767,0.06389333307743073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,32767,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,1,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,1,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,float16,65535,0.10885333021481831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,3,0.0264533335963885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,64,0,1,float16,fp8,65535,0.09118400017420451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,3,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,7,0.025914666553338368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,7,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,15,0.02593066543340683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,15,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,127,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,31,0.026416001220544178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,31,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,63,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,63,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,127,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,255,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,255,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,1023,0.053344001372655235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,511,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,511,0.03501333296298981
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,1023,0.059903999169667564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,2047,0.10291733344395955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,float16,4095,0.1809013287226359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,4095,0.1561973293622335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,64,0,1,float16,fp8,2047,0.09046399593353271
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,3,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,7,0.04346133271853129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,1,0.04347200194994608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,1,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,3,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,7,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,15,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,15,0.039664000272750854
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,31,0.04351999859015147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,31,0.04081066697835922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,63,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,63,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,127,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,127,0.03977066775163015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,255,0.04418133199214935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,255,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,1023,0.09282132983207703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,511,0.06771199901898702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,1,0.07754133145014445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,3,0.07836266855398814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,fp8,511,0.060175999999046326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,64,0,1,float16,float16,1023,0.1070240040620168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,1,0.07196266452471416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,3,0.07228800157705943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,7,0.07743466893831889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,7,0.07222400108973186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,31,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,15,0.07815999786059062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,15,0.07009066641330719
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,31,0.07783466577529907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,127,0.072202667593956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,63,0.07828266421953838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,255,0.07854400078455608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,63,0.07228266696135204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,127,0.07880533238252004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,255,0.07270933190981548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,1,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,1,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,float16,511,0.12262933452924092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,3,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,64,0,1,float16,fp8,511,0.10916800300280254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,3,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,15,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,31,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,127,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,1023,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,1023,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,2047,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,2047,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,4095,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,4095,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,8191,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,32767,0.10693333546320598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,8191,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,16383,0.061893333991368614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,16383,0.05373333394527435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,32767,0.09088533123334248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,float16,65535,0.19605867067972818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,1,0.14802666505177817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,1,0.13366400202115378
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,3,0.133733332157135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,64,0,1,float16,fp8,65535,0.16927999258041382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,7,0.14822399616241455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,3,0.14813333749771118
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,7,0.13352533181508383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,15,0.14802666505177817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,15,0.1336373289426168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,31,0.14802133043607077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,63,0.1472640037536621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,31,0.13434132933616638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,127,0.14631999532381693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,127,0.13384000460306802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,63,0.13435733318328857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,float16,255,0.14391466975212097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,64,0,1,float16,fp8,255,0.1316266655921936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,1,0.2833919922510783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,3,0.283242662747701
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,3,0.26055999596913654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,7,0.26048000653584796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,1,0.26151466369628906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,7,0.2839786609013875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,15,0.26020266612370807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,31,0.2833813428878784
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,31,0.2605866591135661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,63,0.28339733680089313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,1,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,1,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,3,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,3,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,7,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,15,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,15,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,31,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,31,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,63,0.2616373300552368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,63,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,fp8,127,0.2586453358332316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,255,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,63,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,127,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,511,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,127,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,127,0.28169065713882446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,255,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,511,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,1023,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,1023,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,2047,0.023520000278949738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,2047,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,4095,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,4095,0.03182400017976761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,8191,0.06191466748714447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,8191,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,16383,0.10736533006032307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,16383,0.09382399916648865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,1,0.015642666568358738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,1,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,fp8,32767,0.17068266868591309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,3,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,64,0,1,float16,float16,32767,0.19709867238998413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,15,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,3,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,31,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,7,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,7,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,15,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,31,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,64,0,1,float16,float16,15,0.28330133358637494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,63,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,63,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,511,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,127,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,127,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,255,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,255,0.013973332941532135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,511,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,1023,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,1023,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,2047,0.05947199960549673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,2047,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,4095,0.10121066371599834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,4095,0.06206933160622915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,8191,0.18659732739130655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,8191,0.10513066252072652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,1,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,fp8,16383,0.18965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,3,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,16,128,0,1,float16,float16,16383,0.35524264971415204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,3,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,7,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,127,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,15,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,31,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,31,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,63,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,127,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,1023,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,2047,0.013408000270525614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,2047,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,4095,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,16383,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,4095,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,8191,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,32767,0.035904000202814736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,8191,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,16383,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,32767,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,1,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,1,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,float16,65535,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,3,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,16,128,0,1,float16,fp8,65535,0.03676799933115641
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,15,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,15,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,31,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,63,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,63,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,127,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,127,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,2047,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,511,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,1023,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,1023,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,2047,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,4095,0.01581866666674614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,4095,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,8191,0.02072000006834666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,8191,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,16383,0.03568000098069509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,16383,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,32767,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,32767,0.05758399764696757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,1,0.019578666736682255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,1,0.017610666652520496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,3,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,float16,65535,0.09873066345850627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,15,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,16,128,0,1,float16,fp8,65535,0.058650667468706764
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,3,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,7,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,7,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,15,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,31,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,31,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,63,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,63,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,127,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,127,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,255,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,255,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,2047,0.09579199552536011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,511,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,511,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,4095,0.17218667268753052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,1023,0.05269333223501841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,1023,0.03573333223660787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,2047,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,4095,0.09514133135477702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,float16,8191,0.32707200447718304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,16,128,0,1,float16,fp8,8191,0.17106133699417114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,7,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,7,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,15,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,31,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,1023,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,1023,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,255,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,4095,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,8191,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,511,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,2047,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,16383,0.061343997716903687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,2047,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,4095,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,8191,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,16383,0.04075733323891958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,32767,0.10094933708508809
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,32767,0.06229333579540253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,1,0.030042665700117748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,1,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,7,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,float16,65535,0.18585066000620523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,3,0.029696000119050343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,15,0.0260959987839063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,3,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,31,0.03032533327738444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,16,128,0,1,float16,fp8,65535,0.10306666294733684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,7,0.02588266630967458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,15,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,31,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,63,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,255,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,63,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,127,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,127,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,255,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,511,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,2047,0.1725920041402181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,511,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,1023,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,1023,0.09170132875442505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,2047,0.09915733337402344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,1,0.041877334316571556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,float16,4095,0.32570133606592816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,3,0.04939199984073639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,7,0.04186666508515676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,1,0.04837866624196371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,16,128,0,1,float16,fp8,4095,0.17466666301091513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,3,0.04223999877770742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,7,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,15,0.04806933303674062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,15,0.041989331444104515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,31,0.04776533444722494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,31,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,63,0.04934399823347727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,63,0.04174399872620901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,511,0.09299733241399129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,127,0.05065066615740458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,127,0.04230933388074239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,255,0.05269333223501841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,255,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,511,0.06439466774463654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,float16,1023,0.17043733596801758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,1,0.08895466725031535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,16,128,0,1,float16,fp8,1023,0.10241599877675374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,1,0.07630933324495952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,3,0.0892693301041921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,7,0.08897067109743755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,3,0.07632533212502797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,7,0.07630933324495952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,15,0.08919999996821086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,15,0.07472533484299977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,63,0.07625066737333934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,31,0.08865066369374593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,127,0.08888000249862671
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,31,0.0763733337322871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,63,0.08927999933560689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,127,0.0765066643555959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,511,0.17309333880742392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,511,0.11731200416882832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,float16,255,0.09482133388519287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,16,128,0,1,float16,fp8,255,0.07634133100509644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,7,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,15,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,31,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,255,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,511,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,1023,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,1023,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,2047,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,2047,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,4095,0.037461332976818085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,4095,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,8191,0.05856533348560333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,8191,0.03808533400297165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,16383,0.10046399633089702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,32767,0.10217600067456563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,16383,0.05884266893068949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,32767,0.18204265832901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,1,0.16643200318018594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,1,0.1418453355630239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,float16,65535,0.3498613437016805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,16,128,0,1,float16,fp8,65535,0.187391996383667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,3,0.1420906682809194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,7,0.16671466827392578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,3,0.16924800475438437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,7,0.14191466569900513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,15,0.16658133268356323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,31,0.14193066954612732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,15,0.14308266838391623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,63,0.16667733589808145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,31,0.16699200868606567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,127,0.16473066806793213
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,63,0.14196800192197165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,127,0.14197867115338644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,float16,255,0.17561600605646768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,16,128,0,1,float16,fp8,255,0.14113600055376688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,1,0.3220799962679545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,3,0.3221120039621989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,7,0.32215466101964313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,7,0.27483733495076496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,1,0.2758400042851766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,3,0.27502934137980145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,15,0.2749546567598979
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,31,0.3220319946606954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,15,0.32386666536331177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,31,0.27500800291697186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,1,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,63,0.32236266136169434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,1,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,3,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,3,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,7,0.012319999436537424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,7,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,15,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,15,0.01191466674208641
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,63,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,31,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,127,0.2740746736526489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,63,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,fp8,63,0.2746346592903137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,127,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,127,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,255,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,16,128,0,1,float16,float16,127,0.31802666187286377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,511,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,511,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,1023,0.017845333864291508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,1023,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,2047,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,2047,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,4095,0.05849599838256836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,4095,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,8191,0.10121066371599834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,8191,0.06062933305899302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,16383,0.18500800927480063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,16383,0.10431999961535136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,1,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,7,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,1,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,3,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,float16,32767,0.3559199968973796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,3,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,16,128,0,1,float16,fp8,32767,0.18875734011332193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,7,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,15,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,127,0.013408000270525614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,15,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,31,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,31,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,63,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,63,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,127,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,1023,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,255,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,255,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,511,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,511,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,1023,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,2047,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,2047,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,4095,0.0513919989267985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,4095,0.04524800181388855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,3,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,8191,0.08619733651479085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,8191,0.07415999968846639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,float16,16383,0.15203733245531717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,64,0,1,float16,fp8,16383,0.1316213309764862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,7,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,31,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,63,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,127,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,127,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,511,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,4095,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,4095,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,1023,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,2047,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,2047,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,8191,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,8191,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,16383,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,16383,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,32767,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,32767,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,1,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,1,0.012069333344697952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,float16,65535,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,64,0,1,float16,fp8,65535,0.025946666797002155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,7,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,7,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,15,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,31,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,63,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,127,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,255,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,255,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,511,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,1023,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,1023,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,2047,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,2047,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,16383,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,4095,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,4095,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,32767,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,8191,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,8191,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,16383,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,32767,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,3,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,1,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,7,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,7,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,15,0.016197333733240765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,1,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,3,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,float16,65535,0.048298666874567665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,64,0,1,float16,fp8,65535,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,15,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,31,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,255,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,31,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,63,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,63,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,127,0.015568000574906668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,127,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,255,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,511,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,511,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,1023,0.03183466692765554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,1023,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,2047,0.051701332132021584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,2047,0.046122665206591286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,4095,0.08593599994977315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,4095,0.0748586654663086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,1,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,float16,8191,0.15408000349998474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,1,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,64,0,1,float16,fp8,8191,0.13359999656677246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,3,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,7,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,7,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,15,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,31,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,127,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,127,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,255,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,511,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,1023,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,1023,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,2047,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,2047,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,4095,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,4095,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,8191,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,8191,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,16383,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,16383,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,32767,0.04953599969546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,32767,0.044069334864616394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,1,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,3,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,1,0.021840001145998638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,float16,65535,0.08063999811808269
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,15,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,3,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,64,0,1,float16,fp8,65535,0.06820799907048543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,7,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,7,0.023610666394233704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,15,0.021930667261282604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,31,0.021690666675567627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,31,0.021354667842388153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,63,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,63,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,127,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,127,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,255,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,1023,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,255,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,511,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,511,0.02842666705449422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,1023,0.044213334719340004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,2047,0.08569066723187764
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,2047,0.0749013324578603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,float16,4095,0.14612266421318054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,1,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,64,0,1,float16,fp8,4095,0.12609066565831503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,1,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,3,0.03562133262554804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,15,0.03536533315976461
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,7,0.03549333413441976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,3,0.03397866586844126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,7,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,63,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,15,0.03253866732120514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,31,0.0356480007370313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,31,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,63,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,255,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,127,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,127,0.035946667194366455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,255,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,511,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,511,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,float16,1023,0.08529067039489746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,1,0.060645331939061485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,64,0,1,float16,fp8,1023,0.07341866691907246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,3,0.055914665261904396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,7,0.060831998785336815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,15,0.061797335743904114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,1,0.057914664347966514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,3,0.06038400034109751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,7,0.05611733098824819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,15,0.056186666091283165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,31,0.061119998494784035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,31,0.05611733098824819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,63,0.06009600063165029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,63,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,127,0.06258666515350342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,255,0.061749334136645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,127,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,511,0.08481066425641377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,fp8,255,0.05704000095526377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,1,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,64,0,1,float16,float16,511,0.09649067123730977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,1,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,3,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,7,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,15,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,31,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,511,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,127,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,2047,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,255,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,1023,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,1023,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,2047,0.01629866659641266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,16383,0.061162665486335754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,4095,0.023706667125225067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,4095,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,8191,0.0354720006386439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,32767,0.10498666763305664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,8191,0.03148266673088074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,16383,0.05364799996217092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,32767,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,float16,65535,0.19520533084869385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,1,0.11335466305414836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,1,0.10259200135866801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,3,0.11353600025177002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,7,0.11351999640464783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,64,0,1,float16,fp8,65535,0.16883200407028198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,3,0.10283733407656352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,15,0.11319999893506368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,15,0.10291199882825215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,7,0.10507733623186748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,31,0.11335466305414836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,31,0.10292266805966695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,127,0.11250666777292888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,63,0.10301333665847778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,127,0.10326932867368062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,63,0.11451733112335205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,fp8,255,0.10201066732406616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,64,0,1,float16,float16,255,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,1,0.21649066607157388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,1,0.19922133286794028
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,3,0.1992853283882141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,7,0.19923200209935507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,3,0.216703991095225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,15,0.19917333126068115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,7,0.21655466159184775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,31,0.21576533714930216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,15,0.21665066480636597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,31,0.1990506649017334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,1,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,3,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,63,0.2169546683629354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,7,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,7,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,127,0.1982240080833435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,15,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,fp8,63,0.1992853283882141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,31,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,64,0,1,float16,float16,127,0.215503990650177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,31,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,63,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,127,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,255,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,255,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,511,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,511,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,1023,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,1023,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,2047,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,2047,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,16383,0.10734933614730835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,4095,0.037477334340413414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,16383,0.09264000256856282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,4095,0.03127466638882955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,1,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,32767,0.1967466672261556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,float16,8191,0.06197333335876465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,32767,0.16846400499343872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,64,0,1,float16,fp8,8191,0.05414933462937673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,1,0.01339200014869372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,15,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,3,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,31,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,3,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,7,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,7,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,15,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,31,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,63,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,63,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,127,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,127,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,1023,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,255,0.013477332890033722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,255,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,2047,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,511,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,511,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,1023,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,2047,0.04882133503754934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,4095,0.08067200084527333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,4095,0.05133333305517832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,8191,0.08257066706816356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,8191,0.14452266693115234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,1,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,float16,16383,0.27112533648808795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,1,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,12,12,128,0,1,float16,fp8,16383,0.14652799566586813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,3,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,7,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,63,0.011274666835864386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,15,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,127,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,31,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,127,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,255,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,1023,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,4095,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,4095,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,511,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,8191,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,16383,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,2047,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,2047,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,8191,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,16383,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,32767,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,32767,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,1,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,1,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,3,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,fp8,65535,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,12,12,128,0,1,float16,float16,65535,0.04655466477076212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,7,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,15,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,15,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,63,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,127,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,127,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,255,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,255,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,511,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,4095,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,511,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,1023,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,2047,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,2047,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,4095,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,8191,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,8191,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,16383,0.026687999566396076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,16383,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,32767,0.04699199895064036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,float16,65535,0.07593066493670146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,32767,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,1,0.017786666750907898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,1,0.015610666324694952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,3,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,3,0.01579733317097028
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,7,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,12,12,128,0,1,float16,fp8,65535,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,7,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,15,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,15,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,31,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,31,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,63,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,63,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,127,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,127,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,255,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,255,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,511,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,511,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,1023,0.04417066772778829
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,1023,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,2047,0.08044266700744629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,2047,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,4095,0.14454933007558188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,4095,0.08353066444396973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,1,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,1,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,fp8,8191,0.14840533336003622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,12,12,128,0,1,float16,float16,8191,0.2722933292388916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,3,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,7,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,15,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,31,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,127,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,127,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,255,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,511,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,511,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,1023,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,1023,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,2047,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,2047,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,4095,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,4095,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,8191,0.028927999238173168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,8191,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,16383,0.04946133494377136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,16383,0.0308693324526151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,32767,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,32767,0.07870399951934814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,1,0.026314665873845417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,1,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,7,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,3,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,float16,65535,0.13756266236305237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,3,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,12,12,128,0,1,float16,fp8,65535,0.07867733140786488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,7,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,15,0.023957334458827972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,15,0.021984001000722248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,31,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,31,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,63,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,63,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,127,0.023951999843120575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,127,0.02146133283774058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,255,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,1023,0.07283733288447063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,255,0.022181332111358643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,511,0.04371733466784159
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,511,0.03253866732120514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,1023,0.04790933430194855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,2047,0.13805333773295084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,4095,0.14005333185195923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,fp8,2047,0.08147733410199483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,12,12,128,0,1,float16,float16,4095,0.2557973265647888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,1,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,1,0.035429333647092186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,3,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,3,0.035530666510264076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,7,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,7,0.03537066777547201
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,15,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,15,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,127,0.040250666439533234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,31,0.03937600056330363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,31,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,63,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,63,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,127,0.03480533262093862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,255,0.04363733530044556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,255,0.036042665441830955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,511,0.07317866881688435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,1,0.07036800185839336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,511,0.05170666674772898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,float16,1023,0.13033066193262735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,12,12,128,0,1,float16,fp8,1023,0.08186133205890656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,1,0.0588266650835673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,3,0.07019733389218648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,3,0.05858133236567179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,15,0.05995733539263407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,7,0.05910933514436086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,7,0.0699893335501353
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,15,0.06979733208815257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,63,0.05852800110975901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,31,0.07081066568692525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,31,0.05824000140031179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,63,0.07009600102901459
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,255,0.07431999842325847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,127,0.07017600039641063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,127,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,511,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,fp8,255,0.059903999169667564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,1,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,7,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,12,12,128,0,1,float16,float16,511,0.13223466277122498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,7,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,31,0.010298666854699453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,127,0.010330666477481524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,511,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,511,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,1023,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,2047,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,2047,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,4095,0.030821333328882854
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,4095,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,8191,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,8191,0.035989334185918175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,16383,0.09305066863695781
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,16383,0.05789866546789805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,32767,0.1678933302561442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,32767,0.10084266463915507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,1,0.12961066762606302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,float16,65535,0.31939733028411865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,3,0.12955199678738913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,12,12,128,0,1,float16,fp8,65535,0.18682666619618735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,3,0.11115733782450359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,1,0.11124266187349956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,7,0.12917866309483847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,7,0.11103999614715576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,15,0.1293280025323232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,15,0.11137066284815471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,31,0.11114133397738139
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,63,0.11134933431943257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,31,0.13012799620628357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,127,0.12776000301043192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,63,0.13062399625778198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,127,0.11115200320879619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,float16,255,0.1355893313884735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,12,12,128,0,1,float16,fp8,255,0.1091306706269582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,1,0.24452267090479532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,3,0.24422933657964072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,7,0.24425599972407022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,7,0.2095359961191813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,15,0.24428266286849976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,1,0.21052267154057822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,3,0.2107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,15,0.2093706727027893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,1,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,31,0.24425599972407022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,1,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,31,0.2095306714375814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,63,0.24444266160329184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,7,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,3,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,3,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,63,0.2086026668548584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,15,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,31,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,float16,127,0.24218666553497314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,12,12,128,0,1,float16,fp8,127,0.20917866627375284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,31,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,63,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,63,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,127,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,255,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,2047,0.03203733265399933
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,511,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,511,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,1023,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,1023,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,2047,0.02364266663789749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,4095,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,4095,0.03579733272393545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,8191,0.09507200121879578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,8191,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,16383,0.1751733422279358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,16383,0.10228266318639119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,1,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,1,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,float16,32767,0.3333760102589925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,15,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,3,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,63,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,12,12,128,0,1,float16,fp8,32767,0.18902933597564697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,7,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,7,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,15,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,31,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,127,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,127,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,2047,0.022416000564893086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,511,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,511,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,1023,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,8191,0.062122667829195656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,8191,0.05572799841562907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,1023,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,2047,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,16383,0.10684800148010254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,4095,0.04137066751718521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,4095,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,16383,0.09287466605504353
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,3,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,fp8,32767,0.16866666078567505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,7,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,64,0,1,float16,float16,32767,0.19610132773717245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,7,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,15,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,15,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,31,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,63,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,63,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,255,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,1023,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,1023,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,2047,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,2047,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,4095,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,16383,0.016410666207472484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,4095,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,8191,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,8191,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,16383,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,32767,0.01970133309563001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,32767,0.019461333751678467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,65535,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,1,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,1,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,3,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,65535,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,31,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,15,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,float16,131071,0.041562666495641075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,64,0,1,float16,fp8,131071,0.03349866718053818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,63,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,127,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,127,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,255,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,4095,0.01534933348496755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,511,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,1023,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,1023,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,2047,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,2047,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,4095,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,8191,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,8191,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,16383,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,16383,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,32767,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,32767,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,65535,0.038405333956082664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,1,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,1,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,7,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,65535,0.03054400036732356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,3,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,3,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,31,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,31,0.0138026662170887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,7,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,15,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,15,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,float16,131071,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,255,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,63,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,63,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,64,0,1,float16,fp8,131071,0.05341866612434387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,127,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,1023,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,127,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,255,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,511,0.017418666432301205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,4095,0.06418133278687795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,511,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,1023,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,2047,0.04102933406829834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,2047,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,4095,0.05681066711743673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,8191,0.10778133074442546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,8191,0.09478400150934856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,1,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,3,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,float16,16383,0.1978613336881002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,64,0,1,float16,fp8,16383,0.17049066225687662
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,7,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,7,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,15,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,31,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,127,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,511,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,511,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,1023,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,8191,0.018031999468803406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,2047,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,2047,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,4095,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,4095,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,8191,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,16383,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,16383,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,32767,0.0403413325548172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,32767,0.031930667658646904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,1,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,65535,0.06387199958165486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,65535,0.05388799806435903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,3,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,1,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,3,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,7,0.018789333601792652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,15,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,7,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,15,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,float16,131071,0.11161067088445027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,31,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,31,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,63,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,63,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,64,0,1,float16,fp8,131071,0.0939466655254364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,127,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,511,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,127,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,255,0.017423999806245167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,255,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,511,0.022869333624839783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,1023,0.036144000788529716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,1023,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,2047,0.06051200131575266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,2047,0.053770666321118675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,4095,0.09921066959698994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,4095,0.08683733145395915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,1,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,1,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,7,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,3,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,float16,8191,0.17774933576583862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,64,0,1,float16,fp8,8191,0.15316266814867655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,3,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,7,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,15,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,15,0.0249493345618248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,31,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,31,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,63,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,63,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,127,0.025786665578683216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,127,0.025472000241279602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,255,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,255,0.024703999360402424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,511,0.040405333042144775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,511,0.03526933242877325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,1023,0.06000000238418579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,1023,0.053786665201187134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,float16,2047,0.10316266616185506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,1,0.04394133388996124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,3,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,64,0,1,float16,fp8,2047,0.0909493366877238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,1,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,3,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,7,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,7,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,15,0.04398933549722036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,15,0.041365332901477814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,31,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,63,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,31,0.040661332507928215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,255,0.045466666420300804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,63,0.04358933369318644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,127,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,127,0.0406986673672994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,255,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,511,0.06737599770228068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,511,0.060778667529424034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,1,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,7,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,3,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,float16,1023,0.10685867071151733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,64,0,1,float16,fp8,1023,0.09477866689364116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,3,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,7,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,15,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,15,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,31,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,63,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,255,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,255,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,2047,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,1023,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,1023,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,2047,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,4095,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,4095,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,8191,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,8191,0.02253866692384084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,16383,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,16383,0.032229334115982056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,32767,0.053802669048309326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,32767,0.063701331615448
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,65535,0.09087466200192769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,65535,0.10949333508809407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,1,0.07221866647402446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,1,0.07870399951934814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,3,0.07895466685295105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,3,0.07285866638024648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,7,0.07828799883524577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,7,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,float16,131071,0.205402672290802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,64,0,1,float16,fp8,131071,0.16876266400019327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,15,0.0784693310658137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,31,0.07727999985218048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,31,0.07052800059318542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,15,0.07261866827805837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,63,0.07839466631412506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,63,0.07171733180681865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,127,0.07215466598669688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,127,0.07865599791208903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,255,0.07076266904671986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,255,0.07868800063927968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,float16,511,0.1216319998105367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,1,0.14659200112024942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,64,0,1,float16,fp8,511,0.10942932963371277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,1,0.13371200362841287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,3,0.14800533652305603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,3,0.13351466258366904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,7,0.1478559970855713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,15,0.14638933539390564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,15,0.13365333278973898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,7,0.1339413324991862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,31,0.13366400202115378
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,63,0.14793066183725992
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,63,0.13458133737246195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,1,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,127,0.1464959979057312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,31,0.14666666587193808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,3,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,127,0.1340000033378601
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,15,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,float16,255,0.14402666687965393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,255,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,64,0,1,float16,fp8,255,0.13156267007191977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,255,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,511,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,1023,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,1023,0.012357333054145178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,2047,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,2047,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,4095,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,4095,0.022783999641736347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,8191,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,8191,0.031285333136717476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,16383,0.0618453323841095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,16383,0.053786665201187134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,32767,0.1069493293762207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,1,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,1,0.011338666081428528
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,32767,0.09249599774678548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,3,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,7,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,float16,65535,0.19697066148122153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,64,0,1,float16,fp8,65535,0.1683839956919352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,15,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,15,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,127,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,31,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,63,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,63,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,127,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,1023,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,255,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,255,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,511,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,511,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,1023,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,2047,0.03876800090074539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,8191,0.1018933355808258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,2047,0.02478933334350586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,4095,0.0598826656738917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,4095,0.039962666730086006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,8191,0.061306665341059365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,16383,0.18626133600870767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,16383,0.10557333628336589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,float16,32767,0.3547946612040202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,8,128,0,1,float16,fp8,32767,0.18913066387176514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,15,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,15,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,31,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,63,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,127,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,255,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,255,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,4095,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,1023,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,2047,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,2047,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,4095,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,8191,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,32767,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,8191,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,16383,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,16383,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,32767,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,1,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,65535,0.03910933434963226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,65535,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,3,0.010559999694426855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,3,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,7,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,7,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,63,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,63,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,31,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,fp8,131071,0.041178666055202484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,8,128,0,1,float16,float16,131071,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,31,0.010421333213647207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,1023,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,255,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,255,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,511,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,1023,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,2047,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,2047,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,4095,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,4095,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,8191,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,8191,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,16383,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,16383,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,32767,0.03805333375930786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,32767,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,65535,0.057850668827692665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,1,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,1,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,7,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,65535,0.03875733415285746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,3,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,3,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,7,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,15,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,15,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,float16,131071,0.09883733590443929
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,31,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,31,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,8,128,0,1,float16,fp8,131071,0.06047466893990835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,63,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,63,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,127,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,511,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,127,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,255,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,255,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,511,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,1023,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,1023,0.021664001047611237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,4095,0.06201600035031637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,2047,0.059903999169667564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,2047,0.03934400031963984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,8191,0.18838399648666382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,4095,0.10211732983589172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,8191,0.10486400127410889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,1,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,1,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,3,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,3,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,fp8,16383,0.1893440087636312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,8,128,0,1,float16,float16,16383,0.35473068555196124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,15,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,15,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,31,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,63,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,63,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,127,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,127,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,255,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,2047,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,255,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,511,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,511,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,1023,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,2047,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,4095,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,4095,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,8191,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,32767,0.059765333930651345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,8191,0.01863466699918111
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,16383,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,16383,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,32767,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,1,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,1,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,3,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,3,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,7,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,15,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,65535,0.102101335922877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,7,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,65535,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,15,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,31,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,fp8,131071,0.10709333419799805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,31,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,255,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,63,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,8,128,0,1,float16,float16,131071,0.18796267112096152
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,63,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,127,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,127,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,255,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,511,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,511,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,1023,0.05211733281612396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,4095,0.17314134041468301
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,1023,0.035717333356539406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,2047,0.09645332892735799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,2047,0.059562668204307556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,4095,0.09635200103123982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,1,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,1,0.026165333886941273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,3,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,7,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,fp8,8191,0.17054933309555054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,3,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,31,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,8,128,0,1,float16,float16,8191,0.32896532615025836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,7,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,15,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,15,0.026144000391165417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,31,0.026181332767009735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,127,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,63,0.029482667644818623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,511,0.05374933282534281
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,63,0.025813333690166473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,127,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,1023,0.09227733810742696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,255,0.03382933388153712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,255,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,511,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,1023,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,float16,2047,0.17271999518076578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,8,128,0,1,float16,fp8,2047,0.10094400246938069
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,1,0.048986668388048805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,1,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,3,0.04339733223120371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,3,0.04953599969546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,7,0.04966400067011515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,7,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,15,0.04975466430187225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,15,0.04188266893227895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,63,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,31,0.04809066653251648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,127,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,31,0.04218666752179464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,127,0.042170668641726174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,63,0.049679999550183616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,255,0.05340266724427541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,255,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,1,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,511,0.09304533402125041
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,511,0.06596266726652782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,1,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,float16,1023,0.17091200749079385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,8,128,0,1,float16,fp8,1023,0.10188800096511841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,7,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,15,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,31,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,31,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,63,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,63,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,127,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,255,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,511,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,1023,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,1023,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,2047,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,2047,0.016549333930015564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,4095,0.02090666691462199
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,4095,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,8191,0.039066667358080544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,8191,0.02319466571013133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,16383,0.05919999877611796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,16383,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,32767,0.10059199730555217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,32767,0.0609493354956309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,65535,0.18584533532460532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,65535,0.10494400064150493
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,1,0.08887466788291931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,3,0.08892266949017842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,3,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,7,0.09058133761088054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,1,0.07744533320267995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,7,0.07518933216730754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,15,0.08874133229255676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,fp8,131071,0.19420266151428223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,15,0.07576000193754832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,31,0.08893332878748576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,31,0.07639466722806294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,8,128,0,1,float16,float16,131071,0.35812799135843915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,63,0.0749120016892751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,127,0.08879466851552327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,63,0.08970666925112407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,127,0.07630399862925212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,255,0.09472533067067464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,255,0.0757066657145818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,float16,511,0.1718719998995463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,8,128,0,1,float16,fp8,511,0.11750933527946472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,1,0.1418719987074534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,3,0.14400532841682434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,7,0.16713066895802817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,7,0.14243200421333313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,1,0.16700265804926553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,15,0.1668000022570292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,3,0.16770132382710776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,15,0.14281599720319113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,31,0.16668800512949625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,31,0.14389333128929138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,63,0.1671839952468872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,63,0.1421066621939341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,127,0.14257599910100302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,127,0.1659999986489614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,fp8,255,0.1418453355630239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,127,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,8,128,0,1,float16,float16,255,0.176746666431427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,255,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,511,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,1023,0.013552000125249227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,1023,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,2047,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,2047,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,4095,0.03914133210976919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,4095,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,8191,0.05794133245944977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,8191,0.038693333665529885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,16383,0.09975467125574748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,16383,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,32767,0.18517865737279257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,32767,0.10292266805966695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,fp8,65535,0.18625066677729288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,15,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,8,128,0,1,float16,float16,65535,0.3505013386408488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,15,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,31,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,1023,0.013349333157142004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,255,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,1023,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,2047,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,2047,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,4095,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,4095,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,8191,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,8191,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,16383,0.06214933097362518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,16383,0.05434666574001312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,1,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,1,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,3,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,3,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,32767,0.0925546685854594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,32767,0.10732266306877136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,7,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,7,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,15,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,15,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,31,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,float16,65535,0.1970613400141398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,63,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,63,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,64,0,1,float16,fp8,65535,0.1675893266995748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,127,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,127,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,255,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,2047,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,4095,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,255,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,8191,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,1023,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,2047,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,4095,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,8191,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,32767,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,16383,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,16383,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,32767,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,1,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,65535,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,65535,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,1,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,3,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,3,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,7,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,7,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,15,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,15,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,31,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,float16,131071,0.028624000648657482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,31,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,63,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,63,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,127,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,64,0,1,float16,fp8,131071,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,127,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,255,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,4095,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,1023,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,8191,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,2047,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,2047,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,4095,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,8191,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,16383,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,16383,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,32767,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,32767,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,1,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,65535,0.026778665681680042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,1,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,65535,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,3,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,7,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,7,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,15,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,float16,131071,0.04260266820589701
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,31,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,63,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,64,0,1,float16,fp8,131071,0.03349866718053818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,127,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,127,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,255,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,511,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,511,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,1023,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,1023,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,2047,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,2047,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,4095,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,4095,0.031317333380381264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,8191,0.06332266827424367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,8191,0.05533333122730255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,1,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,16383,0.10736533006032307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,16383,0.09379200140635173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,3,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,7,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,31,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,31,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,fp8,32767,0.1696000099182129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,64,0,1,float16,float16,32767,0.19760533173878989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,63,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,255,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,2047,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,255,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,511,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,8191,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,2047,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,4095,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,4095,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,8191,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,32767,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,16383,0.020031999796628952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,16383,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,32767,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,65535,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,1,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,1,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,3,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,65535,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,3,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,7,0.013957332819700241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,7,0.013455999394257864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,15,0.01349866638580958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,15,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,31,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,31,0.013850666582584381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,63,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,float16,131071,0.06403733293215434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,63,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,127,0.014501333236694336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,64,0,1,float16,fp8,131071,0.05580266813437144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,127,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,255,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,255,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,511,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,511,0.015882667154073715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,1023,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,1023,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,2047,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,2047,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,4095,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,4095,0.05671466886997223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,8191,0.1085653305053711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,8191,0.09497066338857015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,1,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,1,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,3,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,3,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,7,0.01788266624013583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,15,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,7,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,float16,16383,0.19784533977508545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,31,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,64,0,1,float16,fp8,16383,0.17059733470280966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,15,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,31,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,63,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,63,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,127,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,127,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,255,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,255,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,511,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,511,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,1023,0.03568533311287562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,1023,0.030095999439557392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,2047,0.060506666700045265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,2047,0.053823997577031456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,1,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,float16,4095,0.09924800197283427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,64,0,1,float16,fp8,4095,0.08689066767692566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,1,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,3,0.026074667771657307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,3,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,7,0.025685332715511322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,7,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,15,0.02555199960867564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,15,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,31,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,31,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,63,0.02619733413060506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,63,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,127,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,127,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,255,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,255,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,511,0.03961600114901861
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,511,0.03454400102297465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,1023,0.059818665186564125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,1023,0.05395199855168661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,15,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,float16,2047,0.10294399658838908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,3,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,64,0,1,float16,fp8,2047,0.09084799885749817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,7,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,7,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,127,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,31,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,63,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,63,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,2047,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,1023,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,1023,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,2047,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,4095,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,4095,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,8191,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,8191,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,16383,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,32767,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,16383,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,32767,0.04012266546487808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,65535,0.05376533170541128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,65535,0.06286933521429698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,1,0.04452266792456309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,1,0.04030400017897288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,3,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,3,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,7,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,7,0.040149333576361336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,15,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,float16,131071,0.11214933792750041
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,15,0.040933333337306976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,64,0,1,float16,fp8,131071,0.09455466270446777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,31,0.04364266494909922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,31,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,63,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,63,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,127,0.04378133515516917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,127,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,255,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,255,0.04005333284536997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,511,0.06809600194295247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,511,0.06101333101590475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,float16,1023,0.10570133725802104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,64,0,1,float16,fp8,1023,0.09463466207186381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,1,0.07770666480064392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,1,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,3,0.07855999966462453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,3,0.07229333122571309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,7,0.07838400204976399
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,7,0.07229333122571309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,15,0.0784746656815211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,15,0.0708000014225642
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,31,0.07807999849319458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,31,0.07218666871388753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,63,0.0783786674340566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,63,0.07212799787521362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,127,0.07931200166543324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,127,0.07211199899514516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,255,0.07840000092983246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,255,0.07146133482456207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,float16,511,0.12150933345158894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,15,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,64,0,1,float16,fp8,511,0.10939199725786845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,63,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,127,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,2047,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,511,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,1023,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,1023,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,2047,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,4095,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,4095,0.017701332767804463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,8191,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,8191,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,16383,0.04044266790151596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,16383,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,32767,0.06406400104363759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,32767,0.05409599840641022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,65535,0.10903466741243999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,65535,0.09205333391825359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,31,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,511,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,fp8,131071,0.169322669506073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,64,0,1,float16,float16,131071,0.20539732774098715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,1023,0.013861333330472311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,1023,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,2047,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,2047,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,4095,0.03782933453718821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,4095,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,8191,0.05989866455396017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,8191,0.03945599993069967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,16383,0.10082667072614034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,1,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,16383,0.06000000238418579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,32767,0.1830880045890808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,3,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,32767,0.10089066624641418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,3,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,7,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,15,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,7,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,15,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,fp8,65535,0.187226672967275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,31,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,4,128,0,1,float16,float16,65535,0.3489706516265869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,63,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,127,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,511,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,255,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,255,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,1023,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,2047,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,2047,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,4095,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,4095,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,8191,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,8191,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,16383,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,16383,0.017898666361967724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,32767,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,32767,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,65535,0.024218666056791942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,1,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,3,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,3,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,65535,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,15,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,float16,131071,0.04379733403523763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,15,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,31,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,63,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,127,0.009882666791478792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,4,128,0,1,float16,fp8,131071,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,127,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,255,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,1023,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,2047,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,2047,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,4095,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,16383,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,4095,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,32767,0.024906667570273083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,8191,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,8191,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,16383,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,32767,0.020549333343903225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,1,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,1,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,65535,0.04020266731580099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,3,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,65535,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,7,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,7,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,15,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,15,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,float16,131071,0.062047998110453285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,31,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,31,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,4,128,0,1,float16,fp8,131071,0.04284266630808512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,63,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,127,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,127,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,255,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,255,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,511,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,511,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,1023,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,1023,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,2047,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,2047,0.024469333390394848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,4095,0.05997333427270254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,4095,0.03994666785001755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,8191,0.10178132851918538
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,8191,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,16383,0.1848586599032084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,16383,0.1029919981956482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,3,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,7,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,7,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,fp8,32767,0.18929600715637207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,15,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,15,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,31,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,4,128,0,1,float16,float16,32767,0.35172800223032635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,63,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,2047,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,511,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,1023,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,2047,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,4095,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,4095,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,8191,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,8191,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,16383,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,16383,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,32767,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,32767,0.025792000194390614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,65535,0.05975466469923655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,1,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,1,0.014570667097965876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,3,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,3,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,7,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,7,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,31,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,15,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,65535,0.041189332803090416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,15,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,31,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,63,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,63,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,127,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,127,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,255,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,float16,131071,0.10075199604034424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,255,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,4,128,0,1,float16,fp8,131071,0.06216000020503998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,511,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,511,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,1023,0.03433600068092346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,1023,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,2047,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,2047,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,4095,0.10190932949384053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,4095,0.06284266710281372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,8191,0.18544532855351767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,1,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,8191,0.10517866412798564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,3,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,1,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,3,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,7,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,7,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,15,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,15,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,float16,16383,0.3543999989827474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,4,128,0,1,float16,fp8,16383,0.18930133183797201
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,31,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,31,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,63,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,63,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,127,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,127,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,255,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,255,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,511,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,2047,0.09608532985051473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,511,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,1023,0.05203733344872793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,1023,0.0352906659245491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,2047,0.05978133281071981
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,float16,4095,0.17231466372807822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,1,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,4,128,0,1,float16,fp8,4095,0.09680533409118652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,7,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,1,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,3,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,3,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,7,0.026538667579491932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,15,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,15,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,31,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,31,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,63,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,63,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,127,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,127,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,255,0.0328053335348765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,255,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,511,0.05386666456858317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,511,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,1023,0.09225066502888997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,1023,0.057775999108950295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,float16,2047,0.17467733224232992
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,4,128,0,1,float16,fp8,2047,0.10090667009353638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,15,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,31,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,31,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,127,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,255,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,1023,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,255,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,1023,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,2047,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,2047,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,4095,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,4095,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,8191,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,8191,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,16383,0.03890133400758108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,32767,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,32767,0.058965335289637245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,16383,0.02479466547568639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,65535,0.10094400246938069
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,1,0.049584001302719116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,1,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,65535,0.06155733267466227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,3,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,3,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,7,0.04902400076389313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,7,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,15,0.047930667797724404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,15,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,31,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,fp8,131071,0.10714667042096455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,31,0.04207466542720795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,63,0.04891733328501383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,4,128,0,1,float16,float16,131071,0.18686934312184653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,127,0.04960533479849497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,63,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,127,0.04163199911514918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,255,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,255,0.043477331598599754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,511,0.09523199995358785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,511,0.06403733293215434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,float16,1023,0.17021334171295166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,4,128,0,1,float16,fp8,1023,0.10100799798965454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,1,0.0888320008913676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,1,0.07458133498827617
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,3,0.08943466345469157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,3,0.07629333436489105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,7,0.08870933453241985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,7,0.07472000022729237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,15,0.0902880032857259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,15,0.07454933226108551
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,31,0.088837335507075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,31,0.07458666463692983
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,63,0.0888853371143341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,63,0.07633600135644276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,127,0.08867200215657552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,1,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,127,0.07698133091131847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,255,0.09462400277455647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,255,0.07688533266385396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,31,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,float16,511,0.1749653418858846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,4,128,0,1,float16,fp8,511,0.11757866541544597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,127,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,255,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,255,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,2047,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,511,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,1023,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,1023,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,2047,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,4095,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,4095,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,8191,0.03937600056330363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,8191,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,16383,0.059114664793014526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,16383,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,32767,0.10122133294741313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,32767,0.06188266475995382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,1,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,1,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,3,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,7,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,65535,0.18453333775202432
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,65535,0.10532800356547038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,31,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,127,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,63,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,63,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,255,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,255,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,1023,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,1023,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,2047,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,fp8,131071,0.19338667392730713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,4,128,0,1,float16,float16,131071,0.35309867064158124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,2047,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,4095,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,4095,0.0176959993938605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,8191,0.023749334116776783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,8191,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,16383,0.041120000183582306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,16383,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,32767,0.06428266565004985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,1,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,32767,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,1,0.009503999724984169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,3,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,3,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,7,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,15,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,65535,0.11130666732788086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,15,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,31,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,63,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,63,0.009882666791478792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,65535,0.09128533800443013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,127,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,127,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,255,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,255,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,511,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,1023,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,2047,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,2047,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,4095,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,4095,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,16383,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,8191,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,8191,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,float16,131071,0.2060693303743998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,16383,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,64,0,1,float16,fp8,131071,0.16914665699005127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,32767,0.021946666141351063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,32767,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,65535,0.024874667326609295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,1,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,3,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,65535,0.021957332889238994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,7,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,7,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,15,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,15,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,31,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,31,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,float16,131071,0.028255999088287354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,63,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,64,0,1,float16,fp8,131071,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,63,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,127,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,127,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,255,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,4095,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,4095,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,1023,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,1023,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,2047,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,2047,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,8191,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,8191,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,16383,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,16383,0.01749333366751671
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,32767,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,32767,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,65535,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,1,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,65535,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,3,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,31,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,float16,131071,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,64,0,1,float16,fp8,131071,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,63,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,511,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,127,0.010330666477481524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,1023,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,2047,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,511,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,1023,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,2047,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,4095,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,4095,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,8191,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,8191,0.03148266673088074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,16383,0.06230400005976359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,16383,0.05565866827964783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,1,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,7,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,15,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,32767,0.10620799660682678
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,15,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,32767,0.09272533655166626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,63,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,63,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,127,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,255,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,511,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,1023,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,4095,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,float16,65535,0.19712533553441366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,2047,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,2047,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,4095,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,64,0,1,float16,fp8,65535,0.1686720053354899
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,8191,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,8191,0.016778666526079178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,16383,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,16383,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,32767,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,32767,0.02025066688656807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,1,0.012250666817029318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,1,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,65535,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,65535,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,7,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,7,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,15,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,63,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,63,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,fp8,131071,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,127,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,64,0,1,float16,float16,131071,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,255,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,255,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,511,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,2047,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,511,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,1023,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,1023,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,2047,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,4095,0.041365332901477814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,4095,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,8191,0.06242666641871134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,8191,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,1,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,1,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,16383,0.10900800426801045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,3,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,3,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,16383,0.09478400150934856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,7,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,7,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,15,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,15,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,31,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,31,0.013408000270525614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,63,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,127,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,63,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,127,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,255,0.013642666240533194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,255,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,511,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,float16,32767,0.19774399201075235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,64,0,1,float16,fp8,32767,0.17044800519943237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,511,0.015520000209410986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,2047,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,1023,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,1023,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,2047,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,1,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,4095,0.057114665706952415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,4095,0.0639466643333435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,1,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,3,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,3,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,float16,8191,0.1085599958896637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,7,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,64,0,1,float16,fp8,8191,0.09505599737167358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,7,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,15,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,15,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,31,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,31,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,63,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,255,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,63,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,127,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,127,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,255,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,511,0.0236160010099411
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,511,0.021903999149799347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,1023,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,1023,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,2047,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,2047,0.05369600156943003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,fp8,4095,0.08691733082135518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,3,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,31,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,64,0,1,float16,float16,4095,0.0995306670665741
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,15,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,63,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,127,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,127,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,255,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,4095,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,1023,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,1023,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,2047,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,2047,0.014565333724021912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,4095,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,8191,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,8191,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,16383,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,16383,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,32767,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,32767,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,65535,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,65535,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,1,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,1,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,3,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,3,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,7,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,7,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,15,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,15,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,31,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,float16,131071,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,64,0,1,float16,fp8,131071,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,31,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,63,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,63,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,127,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,255,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,255,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,511,0.039834665755430855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,511,0.03357866654793421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,1023,0.05991466840108236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,1023,0.05340266724427541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,2047,0.10293333729108174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,1,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,1,0.04012800008058548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,fp8,2047,0.09063999851544698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,3,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,3,0.039877332746982574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,7,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,7,0.03980266551176707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,15,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,15,0.04008000095685323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,31,0.04358399907747904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,31,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,63,0.04370133578777313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,63,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,127,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,127,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,255,0.045610666275024414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,255,0.03932266682386398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,1,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,64,0,1,float16,float16,127,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,3,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,3,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,511,0.06647466619809468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,15,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,15,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,511,0.059903999169667564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,31,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,31,0.009573333586255709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,63,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,63,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,float16,1023,0.1072106659412384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,127,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,64,0,1,float16,fp8,1023,0.09475200374921162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,127,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,255,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,255,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,4095,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,4095,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,511,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,1023,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,2047,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,2047,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,8191,0.017893332988023758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,8191,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,16383,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,16383,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,32767,0.040565334260463715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,32767,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,1,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,1,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,3,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,65535,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,65535,0.054005334774653115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,fp8,131071,0.09387200077374776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,255,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,255,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,511,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,64,0,1,float16,float16,131071,0.11161067088445027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,511,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,1023,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,1023,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,2047,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,2047,0.015322666615247726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,4095,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,4095,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,8191,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,8191,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,16383,0.05987200140953064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,16383,0.03968533376852671
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,32767,0.10147733489672343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,32767,0.0620959997177124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,1,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,3,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,3,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,7,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,15,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,65535,0.18658665815989176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,65535,0.10501866539319356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,31,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,31,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,127,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,255,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,255,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,511,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,2047,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,1023,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,2047,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,4095,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,4095,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,float16,131071,0.35496532917022705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,16383,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,8191,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,2,128,0,1,float16,fp8,131071,0.19530133406321207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,8191,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,16383,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,32767,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,32767,0.021882665654023487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,3,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,65535,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,65535,0.02613866577545802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,7,0.010234666367371878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,float16,131071,0.030202666918436687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,15,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,15,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,31,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,63,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,255,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,2,128,0,1,float16,fp8,131071,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,2047,0.011968000481526056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,255,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,511,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,1023,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,1023,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,2047,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,4095,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,4095,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,8191,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,8191,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,16383,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,16383,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,32767,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,32767,0.020303999384244282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,65535,0.021744000415007275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,65535,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,3,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,float16,131071,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,15,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,2,128,0,1,float16,fp8,131071,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,127,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,127,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,511,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,511,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,1023,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,1023,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,2047,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,2047,0.01788266624013583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,4095,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,4095,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,16383,0.10075733065605164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,8191,0.05977066854635874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,8191,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,16383,0.059978668888409935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,1,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,1,0.009573333586255709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,3,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,3,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,7,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,32767,0.1848479906717936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,32767,0.10179733236630757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,15,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,255,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,511,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,float16,65535,0.3513813416163127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,2047,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,2,128,0,1,float16,fp8,65535,0.18700265884399414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,4095,0.015829333414634068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,2047,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,4095,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,16383,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,8191,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,8191,0.017642666896184284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,16383,0.020293333878119785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,32767,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,32767,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,1,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,1,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,3,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,65535,0.02640533447265625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,3,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,65535,0.043525333205858864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,7,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,7,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,15,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,float16,131071,0.06596800188223521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,31,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,31,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,63,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,2,128,0,1,float16,fp8,131071,0.045381332437197365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,127,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,127,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,255,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,255,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,511,0.013749333719412485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,511,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,1023,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,4095,0.060506666700045265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,1023,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,2047,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,2047,0.02437866727511088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,4095,0.04053866614898046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,8191,0.10223467151323955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,8191,0.0618399977684021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,1,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,1,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,16383,0.1855199933052063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,3,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,16383,0.10496532917022705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,7,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,3,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,15,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,15,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,31,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,7,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,31,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,63,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,63,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,255,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,127,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,511,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,127,0.014384000251690546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,fp8,32767,0.18919465939203897
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,255,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,511,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,2,128,0,1,float16,float16,32767,0.353216012318929
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,1023,0.034741332133611046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,1023,0.022005334496498108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,2047,0.06097066899140676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,2047,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,4095,0.10351999600728352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,4095,0.06227200229962667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,1,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,1,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,3,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,3,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,float16,8191,0.18678933382034302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,2,128,0,1,float16,fp8,8191,0.10628267129262288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,7,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,7,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,15,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,15,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,31,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,31,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,63,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,63,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,127,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,127,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,255,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,255,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,511,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,511,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,1023,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,1023,0.03532266616821289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,2047,0.09810133775075276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,2047,0.05981333553791046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,3,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,fp8,4095,0.09890666604042053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,15,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,2,128,0,1,float16,float16,4095,0.17466666301091513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,63,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,255,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,2047,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,511,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,2047,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,4095,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,4095,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,8191,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,8191,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,16383,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,16383,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,32767,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,32767,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,65535,0.05995733539263407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,65535,0.04164266586303711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,1,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,1,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,3,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,3,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,7,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,7,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,15,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,fp8,131071,0.06189866860707601
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,15,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,2,128,0,1,float16,float16,131071,0.1009226640065511
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,31,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,31,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,63,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,63,0.02526933451493581
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,127,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,255,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,127,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,255,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,511,0.054005334774653115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,511,0.03946666667858759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,1023,0.09249599774678548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,1023,0.05707733333110809
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,1,0.04826133449872335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,fp8,2047,0.10063466429710388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,2,128,0,1,float16,float16,2047,0.17625067631403604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,1,0.04197866717974345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,3,0.04950400193532308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,3,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,7,0.04934399823347727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,7,0.04201066493988037
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,15,0.047685335079828896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,15,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,31,0.049551998575528465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,31,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,63,0.04162133236726125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,63,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,127,0.04996799925963084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,127,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,1,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,1,0.0102613332370917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,255,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,255,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,7,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,15,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,511,0.09393067161242168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,511,0.06427200138568878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,float16,1023,0.17045332988103232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,31,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,63,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,63,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,2,128,0,1,float16,fp8,1023,0.1018399993578593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,127,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,511,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,1023,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,2047,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,2047,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,4095,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,4095,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,8191,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,8191,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,16383,0.03868799904982249
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,16383,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,32767,0.05996266504128774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,32767,0.039306665460268654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,1,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,3,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,65535,0.1011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,65535,0.06225599845250448
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,63,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,127,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,255,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,1023,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,511,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,float16,131071,0.18822399775187174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,1023,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,2,128,0,1,float16,fp8,131071,0.10705066720644633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,2047,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,8191,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,2047,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,16383,0.02477866659561793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,4095,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,4095,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,8191,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,16383,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,1,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,32767,0.041893333196640015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,32767,0.03128000100453695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,3,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,7,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,7,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,15,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,65535,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,65535,0.06464000046253204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,63,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,127,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,255,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,255,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,511,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,4095,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,2047,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,2047,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,4095,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,float16,131071,0.11349333326021831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,8191,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,64,0,1,float16,fp8,131071,0.09469866752624512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,8191,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,16383,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,32767,0.02759466568628947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,16383,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,32767,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,1,0.009941333283980688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,1,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,3,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,65535,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,3,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,7,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,15,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,65535,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,15,0.009583999713261923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,31,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,63,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,float16,131071,0.03465600063403448
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,63,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,64,0,1,float16,fp8,131071,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,127,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,127,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,255,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,255,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,2047,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,2047,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,1023,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,1023,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,4095,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,16383,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,4095,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,8191,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,8191,0.015861333658297855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,16383,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,32767,0.02271999915440877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,32767,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,1,0.00926399976015091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,65535,0.02463999887307485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,1,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,65535,0.023786666492621105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,3,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,7,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,7,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,63,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,float16,131071,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,255,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,31,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,64,0,1,float16,fp8,131071,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,127,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,255,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,511,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,1023,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,1023,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,2047,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,8191,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,2047,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,4095,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,4095,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,8191,0.022357332209746044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,16383,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,16383,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,1,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,1,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,32767,0.06543466448783875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,3,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,32767,0.05394133428732554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,3,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,15,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,15,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,31,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,63,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,63,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,127,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,255,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,255,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,65535,0.09120532870292664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,65535,0.11101333300272624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,511,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,1023,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,2047,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,2047,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,8191,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,4095,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,4095,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,8191,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,16383,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,16383,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,32767,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,32767,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,1,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,65535,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,65535,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,15,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,31,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,float16,131071,0.20567999283472696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,float16,131071,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,64,0,1,float16,fp8,131071,0.1692053278287252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,64,0,1,float16,fp8,131071,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,127,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,1023,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,1023,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,2047,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,2047,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,4095,0.023775999744733173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,4095,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,8191,0.04013866682847341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,8191,0.03120533376932144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,16383,0.05542933444182078
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,16383,0.06303466856479645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,1,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,1,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,3,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,3,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,7,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,7,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,15,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,15,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,32767,0.10687466462453206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,32767,0.09270399808883667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,63,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,31,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,63,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,127,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,127,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,255,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,511,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,511,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,1023,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,1023,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,2047,0.024271999796231587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,2047,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,4095,0.04131733377774557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,4095,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,8191,0.06394133468468984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,1,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,8191,0.056613331039746605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,1,0.013818666338920593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,3,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,3,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,fp8,65535,0.16885334253311157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,7,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,7,0.013370666652917862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,15,0.016016000260909397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,fp8,16383,0.09469333291053772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,64,0,1,float16,float16,65535,0.1971893310546875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,15,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,64,0,1,float16,float16,16383,0.10795733332633972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,31,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,31,0.014069333672523499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,63,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,63,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,127,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,127,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,255,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,511,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,255,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,1023,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,511,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,1023,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,2047,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,2047,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,1,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,3,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,4095,0.06407466530799866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,4095,0.05749333401521047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,7,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,7,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,31,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,31,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,63,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,63,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,float16,8191,0.10946133732795715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,64,0,1,float16,fp8,8191,0.09475200374921162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,127,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,127,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,255,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,511,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,511,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,4095,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,1023,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,1023,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,2047,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,2047,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,4095,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,8191,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,8191,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,16383,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,32767,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,16383,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,32767,0.02179733415444692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,65535,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,1,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,65535,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,1,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,3,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,3,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,7,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,7,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,15,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,15,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,float16,131071,0.04531733194986979
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,64,0,1,float16,fp8,131071,0.03544000039498011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,31,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,31,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,63,0.018144000321626663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,63,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,127,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,127,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,255,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,255,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,511,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,511,0.022090665996074677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,1023,0.03689600030581156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,1023,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,2047,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,2047,0.05364799996217092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,1,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,3,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,1,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,3,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,float16,4095,0.10106666882832845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,64,0,1,float16,fp8,4095,0.0869653324286143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,7,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,7,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,15,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,15,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,31,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,31,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,63,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,63,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,127,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,127,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,255,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,255,0.02475200096766154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,511,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,511,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,3,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,3,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,7,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,7,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,15,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,15,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,1023,0.060032000144322716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,1023,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,63,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,63,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,127,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,255,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,float16,2047,0.1032373309135437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,255,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,1023,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,64,0,1,float16,fp8,2047,0.09059733152389526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,2047,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,2047,0.01462399959564209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,4095,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,4095,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,8191,0.01801066721479098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,8191,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,16383,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,16383,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,32767,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,1,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,32767,0.025839999318122864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,3,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,1,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,65535,0.041797334949175514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,65535,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,15,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,15,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,31,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,255,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,fp8,131071,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,127,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,64,0,1,float16,float16,131071,0.06534933547178905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,511,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,1023,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,1023,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,2047,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,8191,0.02139200021823247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,2047,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,4095,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,4095,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,8191,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,16383,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,16383,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,32767,0.06007466713587443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,32767,0.04117333392302195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,1,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,1,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,7,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,15,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,65535,0.10307733217875163
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,31,0.009888000165422758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,63,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,65535,0.06262399752934773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,63,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,127,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,255,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,511,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,1023,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,4095,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,1023,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,2047,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,2047,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,fp8,131071,0.10912533601125081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,4095,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,8191,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,8191,0.015482666591803232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,32767,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,16383,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,1,1,128,0,1,float16,float16,131071,0.18931732575098673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,16383,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,32767,0.02784000088771184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,65535,0.03169599920511246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,65535,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,1,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,3,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,7,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,7,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,15,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,15,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,127,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,float16,131071,0.03572800010442734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,31,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,511,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,1,1,128,0,1,float16,fp8,131071,0.033717334270477295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,63,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,2047,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,2047,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,4095,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,255,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,511,0.01044800008336703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,1023,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,1023,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,4095,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,8191,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,8191,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,16383,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,16383,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,32767,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,32767,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,65535,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,65535,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,15,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,float16,131071,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,15,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,63,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,1,1,128,0,1,float16,fp8,131071,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,511,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,1023,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,1023,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,2047,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,2047,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,4095,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,4095,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,8191,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,8191,0.024154665569464367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,16383,0.0601440022389094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,16383,0.04009599983692169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,1,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,32767,0.1032426655292511
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,1,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,32767,0.06280000011126201
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,3,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,7,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,7,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,15,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,15,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,31,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,31,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,63,0.010399999717871347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,63,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,65535,0.10565867026646932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,65535,0.18583999077479044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,255,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,511,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,1023,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,1023,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,2047,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,2047,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,4095,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,4095,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,16383,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,8191,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,8191,0.018309333672126133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,16383,0.021226666867733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,32767,0.02274666726589203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,32767,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,65535,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,1,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,65535,0.023567999402681988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,3,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,fp8,131071,0.19536532958348593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,1,1,128,0,1,float16,float16,131071,0.35603201389312744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,float16,131071,0.04649066428343455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,1,1,128,0,1,float16,fp8,131071,0.03134933362404505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,127,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,511,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,1023,0.014106666048367819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,1023,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,2047,0.02082666630546252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,2047,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,4095,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,4095,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,8191,0.060085331400235496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,8191,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,16383,0.10116266210873921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,1,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,16383,0.0598826656738917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,1,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,3,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,7,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,7,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,15,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,15,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,32767,0.18478933970133463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,31,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,32767,0.10293333729108174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,63,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,63,0.011663999408483505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,127,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,255,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,511,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,511,0.013306666165590286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,1023,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,1023,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,2047,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,2047,0.024298667907714844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,4095,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,4095,0.040591999888420105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,8191,0.10291733344395955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,8191,0.0626933326323827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,1,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,float16,65535,0.3532960017522176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,1,1,128,0,1,float16,fp8,65535,0.18795732657114664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,1,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,3,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,3,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,float16,16383,0.18650666872660318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,7,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,1,1,128,0,1,float16,fp8,16383,0.10497066378593445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,7,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,15,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,15,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,31,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,31,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,63,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,63,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,127,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,127,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,255,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,255,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,511,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,511,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,1023,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,1023,0.022709332406520844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,2047,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,2047,0.06046399970849355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,1,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,1,0.01033599985142549
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,3,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,4095,0.10329066713651021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,15,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,15,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,31,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,63,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,4095,0.06366933385531108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,63,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,127,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,fp8,8191,0.10656533638636272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,1,1,128,0,1,float16,float16,8191,0.1869386633237203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,2047,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,511,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,1023,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,1023,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,2047,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,4095,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,4095,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,8191,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,32767,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,8191,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,16383,0.020495999604463577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,16383,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,32767,0.022384000321229298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,65535,0.043290664752324425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,1,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,65535,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,1,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,3,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,3,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,7,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,7,0.018581333259741466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,fp8,131071,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,15,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,15,0.017605333278576534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,31,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,31,0.017632000148296356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,1,1,128,0,1,float16,float16,131071,0.06608533362547557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,63,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,63,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,127,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,127,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,255,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,255,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,511,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,511,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,1023,0.0537066658337911
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,1023,0.035375999907652535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,2047,0.0977280040582021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,2047,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,1,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,1,0.02685333291689555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,3,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,float16,4095,0.17563732465108237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,1,1,128,0,1,float16,fp8,4095,0.09879466891288757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,3,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,7,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,7,0.026314665873845417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,15,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,15,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,31,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,31,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,63,0.026543999711672466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,63,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,127,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,127,0.02678400029738744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,255,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,255,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,511,0.0386613334218661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,3,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,3,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,511,0.054474666714668274
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,7,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,7,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,15,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,1023,0.09263466795285542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,1023,0.0577706644932429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,15,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,63,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,float16,2047,0.17681066195170084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,1,1,128,0,1,float16,fp8,2047,0.10076266527175903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,255,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,255,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,2047,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,2047,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,4095,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,4095,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,8191,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,8191,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,16383,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,32767,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,16383,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,32767,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,65535,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,1,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,65535,0.06157866617043813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,1,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,1,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,3,0.04808533191680908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,1,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,3,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,7,0.049728001157442726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,3,0.04795733094215393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,7,0.041290665666262306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,7,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,7,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,3,0.039781334499518074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,15,0.042026668787002563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,15,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,31,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,float16,131071,0.10268266995747884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,15,0.05073066552480062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,15,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,1,1,128,0,1,float16,fp8,131071,0.0626453310251236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,31,0.059994667768478394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,31,0.053082664807637535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,31,0.05386666456858317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,63,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,63,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,63,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,127,0.060266668597857155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,127,0.05385066568851471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,63,0.060319999853769936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,127,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,255,0.060218666990598045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,255,0.07041066884994507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,255,0.05388799806435903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,255,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,127,0.05997333427270254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,511,0.0603359987338384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,511,0.10514666636784871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,511,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,511,0.09713600079218547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,1023,0.060378665725390114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,1023,0.17229332526524863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,1023,0.05388266841570536
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,1023,0.1644426683584849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,2047,0.06030400097370148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,2047,0.053946668903032936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,2047,0.30939199527104694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,float16,4095,0.06066133578618368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,2047,0.2999200026194255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,128,1,float16,fp8,4095,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,float16,4095,0.5821066697438558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,1,0.04931733508904775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,1,0.048938666780789696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,1,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,64,0,1,float16,fp8,4095,0.5720586776733398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,1,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,3,0.047925333182017006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,3,0.04820266862710317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,7,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,3,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,3,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,7,0.050069332122802734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,7,0.05008533100287119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,7,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,31,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,31,0.05373866856098175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,15,0.051311999559402466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,63,0.06026133398214976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,15,0.051632001996040344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,15,0.042175998290379844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,63,0.05379733443260193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,15,0.043621331453323364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,127,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,31,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,31,0.05338666836420695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,63,0.06004266440868378
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,63,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,127,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,255,0.06412800153096516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,127,0.053871999184290566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,127,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,255,0.06018133461475372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,255,0.07067200044790904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,255,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,511,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,511,0.10519466797510783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,1023,0.16470932960510254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,511,0.05398400127887726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,511,0.09715200463930766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,1023,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,1023,0.17066667477289835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,1023,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,2047,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,2047,0.053823997577031456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,2047,0.3079520066579183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,float16,4095,0.061936000982920326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,2047,0.2999093333880107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,128,1,float16,fp8,4095,0.05395199855168661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,1,0.04810666541258494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,float16,4095,0.5824533303578695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,1,0.0484746644894282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,64,0,1,float16,fp8,4095,0.5724320014317831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,3,0.03973866750796636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,1,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,1,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,3,0.04970666766166687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,3,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,3,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,7,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,7,0.04979733129342397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,7,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,31,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,7,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,15,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,15,0.05176533261934916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,15,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,15,0.041893333196640015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,31,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,31,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,31,0.05376533170541128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,63,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,127,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,127,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,63,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,63,0.05377600093682607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,63,0.05393599967161814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,127,0.06026133398214976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,127,0.060319999853769936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,255,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,511,0.053770666321118675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,255,0.07042133311430614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,255,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,255,0.06403733293215434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,511,0.060549333691596985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,511,0.10523200035095215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,511,0.09739733735720317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,1023,0.060415998101234436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,1023,0.17091200749079385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,2047,0.054101333022117615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,1023,0.0540533314148585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,2047,0.30059732993443805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,1023,0.16455466548601785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,2047,0.06035199761390686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,fp8,4095,0.053802669048309326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,2047,0.30802132685979206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,128,1,float16,float16,4095,0.060346667965253196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,1,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,float16,4095,0.5900106827418009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,1,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,1,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,64,0,1,float16,fp8,4095,0.5724800030390421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,1,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,3,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,3,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,3,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,3,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,7,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,7,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,7,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,7,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,15,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,15,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,15,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,15,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,63,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,63,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,31,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,31,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,31,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,31,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,63,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,63,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,127,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,127,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,127,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,127,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,255,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,255,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,255,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,255,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,511,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,511,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,511,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,511,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,1023,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,2047,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,2047,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,1023,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,4095,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,1023,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,2047,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,float16,2047,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,float16,4095,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,1,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,128,1,float16,fp8,4095,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,64,0,1,float16,fp8,4095,0.03734933336575826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,1,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,1,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,1,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,3,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,3,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,3,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,3,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,7,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,7,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,7,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,7,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,15,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,15,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,15,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,15,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,31,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,31,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,31,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,127,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,31,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,127,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,127,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,63,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,63,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,127,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,255,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,255,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,255,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,255,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,511,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,511,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,511,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,511,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,1023,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,1023,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,1023,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,1023,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,2047,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,2047,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,2047,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,2047,0.031328000128269196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,4095,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,4095,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,8191,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,4095,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,4095,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,8191,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,8191,0.06428800026575725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,8191,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,float16,16383,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,float16,16383,0.11115733782450359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,128,1,float16,fp8,16383,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,1,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,3,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,1,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,64,0,1,float16,fp8,16383,0.09917333722114563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,1,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,3,0.00855466661353906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,3,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,3,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,7,0.008687999720374743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,7,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,7,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,7,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,15,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,15,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,15,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,31,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,31,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,31,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,63,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,127,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,63,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,127,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,255,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,255,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,255,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,255,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,511,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,511,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,511,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,511,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,1023,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,1023,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,1023,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,1023,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,2047,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,2047,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,2047,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,2047,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,4095,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,4095,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,8191,0.06006399790445963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,4095,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,16383,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,4095,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,float16,8191,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,8191,0.06432533264160156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,8191,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,128,1,float16,fp8,16383,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,float16,16383,0.11124266187349956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,1,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,64,0,1,float16,fp8,16383,0.09900266925493877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,1,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,1,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,1,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,3,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,3,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,3,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,3,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,7,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,7,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,7,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,7,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,15,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,15,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,15,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,15,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,31,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,31,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,63,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,63,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,127,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,127,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,127,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,127,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,255,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,255,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,255,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,255,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,511,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,511,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,511,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,511,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,1023,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,2047,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,1023,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,1023,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,1023,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,2047,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,2047,0.02350933353106181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,4095,0.04179200033346812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,2047,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,4095,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,4095,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,16383,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,4095,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,float16,8191,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,8191,0.0642986645301183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,8191,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,8191,0.0603359987338384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,float16,16383,0.1104693313439687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,128,1,float16,fp8,16383,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,64,0,1,float16,fp8,16383,0.09916266798973083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,1,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,7,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,7,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,7,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,15,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,63,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,63,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,255,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,127,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,127,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,127,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,255,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,511,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,511,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,511,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,1023,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,1023,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,4095,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,1023,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,2047,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,2047,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,2047,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,2047,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,4095,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,4095,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,4095,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,8191,0.01044800008336703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,8191,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,8191,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,8191,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,float16,16383,0.011829332758982977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,1,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,float16,16383,0.017562666287024815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,128,1,float16,fp8,16383,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,1,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,64,0,1,float16,fp8,16383,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,1,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,3,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,3,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,3,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,3,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,7,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,7,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,7,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,7,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,15,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,15,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,15,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,15,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,31,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,31,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,31,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,31,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,63,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,63,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,63,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,63,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,127,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,127,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,127,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,127,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,255,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,511,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,255,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,255,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,255,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,511,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,511,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,1023,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,2047,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,2047,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,511,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,1023,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,1023,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,4095,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,1023,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,2047,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,2047,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,4095,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,4095,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,4095,0.04956266780694326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,8191,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,8191,0.09478400150934856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,8191,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,8191,0.08858666817347209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,float16,16383,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,float16,16383,0.17676266034444174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,128,1,float16,fp8,16383,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,1,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,64,0,1,float16,fp8,16383,0.16673066218694052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,3,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,1,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,1,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,1,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,3,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,3,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,3,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,7,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,7,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,7,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,15,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,15,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,15,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,15,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,31,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,31,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,31,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,31,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,63,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,63,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,63,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,63,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,127,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,127,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,127,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,255,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,255,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,255,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,255,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,511,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,511,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,511,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,511,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,1023,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,1023,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,1023,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,1023,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,4095,0.05402666827042898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,2047,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,2047,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,2047,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,2047,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,8191,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,4095,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,8191,0.08867200215657552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,4095,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,4095,0.049685334165891014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,8191,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,8191,0.09478400150934856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,float16,16383,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,float16,16383,0.17722133795420328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,128,1,float16,fp8,16383,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,1,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,64,0,1,float16,fp8,16383,0.16642666856447855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,1,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,1,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,3,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,3,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,3,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,3,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,15,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,7,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,7,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,7,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,7,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,15,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,15,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,15,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,31,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,31,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,31,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,31,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,63,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,63,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,63,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,127,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,127,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,127,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,127,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,511,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,255,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,255,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,255,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,511,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,1023,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,511,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,511,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,2047,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,1023,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,4095,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,1023,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,4095,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,1023,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,2047,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,2047,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,2047,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,4095,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,4095,0.049327999353408813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,8191,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,8191,0.09479999542236328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,8191,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,8191,0.08858666817347209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,float16,16383,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,float16,16383,0.17671465873718262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,128,1,float16,fp8,16383,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,1,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,64,0,1,float16,fp8,16383,0.1666719913482666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,1,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,3,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,3,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,7,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,15,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,15,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,15,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,31,0.010559999694426855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,127,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,127,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,127,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,255,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,255,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,255,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,511,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,2047,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,2047,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,1023,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,1023,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,1023,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,4095,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,1023,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,2047,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,2047,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,4095,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,4095,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,4095,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,8191,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,8191,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,float16,16383,0.020128000527620316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,8191,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,8191,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,float16,16383,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,128,1,float16,fp8,16383,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,64,0,1,float16,fp8,16383,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,1,0.09058133761088054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,1,0.08946667114893596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,3,0.07261333366235097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,1,0.07239466905593872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,1,0.07254933317502339
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,3,0.09066133697827657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,3,0.09071466326713562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,3,0.07261333366235097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,7,0.09265066186587016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,7,0.09273599584897359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,7,0.07735999921957652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,7,0.07632000247637431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,15,0.0949173370997111
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,15,0.09504533807436626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,15,0.07895466685295105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,15,0.07857066889603932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,31,0.11316800117492676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,31,0.11310933033625285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,31,0.09914132952690125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,31,0.09923733274141948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,63,0.11366400122642517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,63,0.11358400185902913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,63,0.10087999701499939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,63,0.10097066561381023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,127,0.11481600006421407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,127,0.11468266447385152
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,127,0.1011893351872762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,127,0.1011306643486023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,255,0.11531733473141988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,255,0.13577600320180258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,255,0.1009173293908437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,255,0.12148800492286682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,511,0.1151626706123352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,511,0.2015893260637919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,511,0.10116266210873921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,511,0.18891199429829916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,1023,0.11532800396283467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,1023,0.33479468027750653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,1023,0.1011253297328949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,float16,2047,0.6070453325907389
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,1023,0.32210665941238403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,1,0.07390399773915608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,float16,2047,0.11598933736483256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,128,1,float16,fp8,2047,0.10097600022951762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,1,0.0906880001227061
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,3,0.07257600128650665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,1,0.09063999851544698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,64,0,1,float16,fp8,2047,0.59061332543691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,1,0.07250133156776428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,3,0.09063999851544698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,3,0.09064533313115437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,15,0.09475732843081157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,3,0.07357333103815715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,7,0.09311466415723164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,15,0.07871999839941661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,15,0.07869333525498708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,7,0.09276266892751057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,7,0.07628266513347626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,7,0.07625066737333934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,63,0.11326932907104492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,15,0.09486933549245198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,31,0.11321066816647847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,63,0.10089600086212158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,31,0.11335466305414836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,31,0.09913067022959392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,127,0.11544000109036763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,31,0.09925867120424907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,63,0.11359467109044392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,63,0.10090133547782898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,127,0.1151093343893687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,127,0.10089600086212158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,127,0.10131733616193135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,255,0.11548266808191936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,255,0.1357599993546804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,255,0.10094933708508809
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,255,0.12184000015258789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,511,0.11518399914105733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,511,0.20245865980784097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,511,0.10090667009353638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,511,0.18935465812683105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,1023,0.11517332990964253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,1023,0.10099732875823975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,fp8,2047,0.10096533099810283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,1023,0.33444265524546307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,128,1,float16,float16,2047,0.11524800459543864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,1023,0.3222080071767171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,1,0.09052800138791402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,float16,2047,0.6088106632232666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,1,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,1,0.07257066667079926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,64,0,1,float16,fp8,2047,0.5930240154266357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,1,0.07425599793593089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,3,0.09063999851544698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,3,0.09076799949010213
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,3,0.07437333464622498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,3,0.07425599793593089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,7,0.09264000256856282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,7,0.09353599945704143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,7,0.07655466596285503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,7,0.07621866464614868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,15,0.09514133135477702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,15,0.09500799576441447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,15,0.07859733204046886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,15,0.07859200239181519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,31,0.1139306624730428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,31,0.11344533165295918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,31,0.0990666647752126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,31,0.09988799691200256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,63,0.11346133550008138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,63,0.11357866724332173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,63,0.10089600086212158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,63,0.10156266887982686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,127,0.11532266934712727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,127,0.11528000235557556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,127,0.10133866469065349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,127,0.10116799672444661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,255,0.11528533697128296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,255,0.13578133781750998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,255,0.10167466600735982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,255,0.12143466869990031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,511,0.18898133436838785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,511,0.11520000298817952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,511,0.20301334063212076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,511,0.10126933455467224
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,1023,0.11523200074831645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,1023,0.1011786659558614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,1023,0.3354880015055339
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,1,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,float16,2047,0.11558399597803752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,1023,0.32209599018096924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,128,1,float16,fp8,2047,0.10129066308339436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,float16,2047,0.6168533166249593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,1,0.016208000481128693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,3,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,64,0,1,float16,fp8,2047,0.5909119844436646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,7,0.0161920003592968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,1,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,1,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,3,0.016330666840076447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,15,0.016613333175579708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,3,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,3,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,7,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,7,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,7,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,31,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,15,0.015743999431530636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,15,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,63,0.016000000139077503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,15,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,31,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,31,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,31,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,63,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,63,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,63,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,127,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,127,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,127,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,127,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,255,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,255,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,511,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,255,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,255,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,1023,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,511,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,511,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,511,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,1023,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,1023,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,1023,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,float16,2047,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,1,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,float16,2047,0.0451200008392334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,128,1,float16,fp8,2047,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,64,0,1,float16,fp8,2047,0.03810133288304011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,1,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,1,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,1,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,3,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,3,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,3,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,3,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,7,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,7,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,7,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,7,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,15,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,15,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,15,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,15,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,31,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,31,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,31,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,31,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,63,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,127,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,63,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,63,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,63,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,127,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,127,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,127,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,255,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,255,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,255,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,511,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,255,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,511,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,511,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,511,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,1023,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,2047,0.05263466636339823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,1023,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,1023,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,1023,0.03364266703526179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,4095,0.09505599737167358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,2047,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,2047,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,2047,0.05188799897829691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,4095,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,4095,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,4095,0.0906826655069987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,8191,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,8191,0.17868266503016153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,8191,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,8191,0.1666719913482666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,float16,16383,0.3529599905014038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,float16,16383,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,128,1,float16,fp8,16383,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,1,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,1,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,1,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,64,0,1,float16,fp8,16383,0.3208000063896179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,1,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,3,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,3,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,3,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,3,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,7,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,7,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,7,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,31,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,7,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,15,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,15,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,15,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,15,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,31,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,31,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,31,0.015957333147525787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,63,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,63,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,63,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,63,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,127,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,255,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,127,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,127,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,127,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,255,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,255,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,255,0.019546666493018467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,511,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,511,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,1023,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,511,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,511,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,1023,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,1023,0.03148266673088074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,1023,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,2047,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,2047,0.05279466509819031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,4095,0.0900266667207082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,2047,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,2047,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,4095,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,4095,0.09473599990208943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,4095,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,8191,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,8191,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,fp8,16383,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,1,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,8191,0.17912532885869345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,8191,0.16673066218694052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,128,1,float16,float16,16383,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,float16,16383,0.35381333033243817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,1,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,3,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,64,0,1,float16,fp8,16383,0.3202986717224121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,1,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,1,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,3,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,3,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,3,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,7,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,7,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,7,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,7,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,15,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,15,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,15,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,15,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,31,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,31,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,31,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,31,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,63,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,63,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,127,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,63,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,63,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,127,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,127,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,127,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,255,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,255,0.015370666980743408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,255,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,255,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,511,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,511,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,511,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,511,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,1023,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,1023,0.031498665610949196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,2047,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,1023,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,1023,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,2047,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,2047,0.052101333936055504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,2047,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,4095,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,4095,0.09503466884295146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,4095,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,8191,0.166703999042511
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,4095,0.09061333537101746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,8191,0.013712000101804733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,8191,0.18083733320236206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,8191,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,float16,16383,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,128,1,float16,fp8,16383,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,float16,16383,0.3563946485519409
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,64,0,1,float16,fp8,16383,0.3218826651573181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,1,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,3,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,3,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,7,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,15,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,15,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,31,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,31,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,63,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,127,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,255,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,255,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,511,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,511,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,511,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,511,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,1023,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,1023,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,1023,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,1023,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,2047,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,2047,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,2047,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,2047,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,4095,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,4095,0.015989333391189575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,4095,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,4095,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,8191,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,8191,0.02000533292690913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,8191,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,8191,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,float16,16383,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,float16,16383,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,1,0.17299199104309082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,128,1,float16,fp8,16383,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,1,0.1381119986375173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,64,0,1,float16,fp8,16383,0.02457600086927414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,3,0.1732213298479716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,1,0.17291200160980225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,1,0.13985066612561545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,3,0.1730133295059204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,3,0.13939199844996134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,3,0.1388746698697408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,7,0.17881067593892416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,15,0.1828213334083557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,7,0.17853333552678427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,7,0.14432000120480856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,7,0.144186665614446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,15,0.1827359994252523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,15,0.1520746648311615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,15,0.15108266472816467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,31,0.19318399826685587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,31,0.2197493314743042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,31,0.21973333756128946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,31,0.19328000148137411
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,63,0.22170666853586832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,63,0.19379200537999472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,127,0.1953493356704712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,63,0.22182933489481607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,63,0.19338132937749228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,127,0.2220053275426229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,127,0.22381865978240967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,127,0.19518399238586426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,511,0.22412800788879395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,255,0.22401599089304605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,255,0.26480533679326373
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,255,0.1954453388849894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,255,0.2343626618385315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,511,0.19507733980814615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,fp8,1023,0.19546133279800415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,511,0.39607465267181396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,float16,1023,0.659770647684733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,1,0.1728586753209432
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,511,0.367520014444987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,128,1,float16,float16,1023,0.22409600019454956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,1,0.1731040080388387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,64,0,1,float16,fp8,1023,0.6319733460744222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,1,0.13981866836547852
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,3,0.14019733667373657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,7,0.17882666985193887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,1,0.14018133282661438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,3,0.17285333077112833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,3,0.1730239987373352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,3,0.14008532961209616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,7,0.1790506641070048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,7,0.14602667093276978
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,7,0.14595199624697366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,15,0.18281066417694092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,15,0.1829973260561625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,15,0.1520960032939911
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,15,0.15179199973742166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,31,0.2198293407758077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,31,0.21974400679270426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,31,0.19335466623306274
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,31,0.19339734315872192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,63,0.22172266244888306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,63,0.2220053275426229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,63,0.19507733980814615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,63,0.19379733006159464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,127,0.2239840030670166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,127,0.22381333510080972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,127,0.1954186757405599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,127,0.19553067286809286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,511,0.22406933705012003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,255,0.22386133670806885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,255,0.26476800441741943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,255,0.19531200329462686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,255,0.23438400030136108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,511,0.19523199399312338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,511,0.39741865793863934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,511,0.3692320187886556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,float16,1023,0.22414400180180868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,128,1,float16,fp8,1023,0.19543999433517456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,float16,1023,0.6581173340479533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,1,0.17287466923395792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,1,0.17324799299240112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,64,0,1,float16,fp8,1023,0.6318826675415039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,1,0.14012799660364786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,1,0.14020267128944397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,3,0.17293334007263184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,7,0.17909334103266397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,3,0.17306133111317953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,3,0.1400373379389445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,3,0.14012799660364786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,7,0.17908799648284912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,7,0.14607999722162882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,7,0.14593600233395895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,15,0.1834239959716797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,31,0.21971199909845987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,15,0.18276800711949667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,15,0.15213333566983542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,15,0.15212800105412802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,31,0.2201813260714213
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,63,0.22174400091171265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,31,0.1934559941291809
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,31,0.19311465819676718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,63,0.22180799643198648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,63,0.19373865922292074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,63,0.19507733980814615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,127,0.2237493395805359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,127,0.223797341187795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,127,0.19546133279800415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,127,0.195360004901886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,255,0.2238666613896688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,255,0.26472000281016034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,255,0.19514666001001993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,511,0.3962186574935913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,255,0.23540266354878744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,511,0.2241226633389791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,511,0.19545066356658936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,511,0.368064006169637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,float16,1023,0.2241333325703939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,128,1,float16,fp8,1023,0.19537067413330078
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,1,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,float16,1023,0.6745173136393229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,1,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,1,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,64,0,1,float16,fp8,1023,0.6322720050811768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,1,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,3,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,3,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,3,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,7,0.021551998953024547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,3,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,7,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,7,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,7,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,15,0.021674667795499165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,15,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,15,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,15,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,31,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,31,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,31,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,31,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,63,0.021749332547187805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,63,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,63,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,63,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,127,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,127,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,127,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,127,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,255,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,255,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,255,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,255,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,511,0.02165333429972331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,511,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,511,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,511,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,float16,1023,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,float16,1023,0.042090664307276406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,128,1,float16,fp8,1023,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,64,0,1,float16,fp8,1023,0.037445334096749626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,1,0.34044798215230304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,1,0.339466651280721
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,1,0.27115732431411743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,1,0.27114667495091754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,3,0.3404853343963623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,3,0.33980266253153485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,3,0.2725759943326314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,3,0.2709439992904663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,7,0.35072000821431476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,7,0.35072000821431476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,7,0.28324800729751587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,7,0.2832213242848714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,15,0.3585226535797119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,31,0.432805339495341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,15,0.3574133316675822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,15,0.29582399129867554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,15,0.2958293358484904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,31,0.4326666593551636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,63,0.4367946783701579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,31,0.3796853224436442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,31,0.3803413311640422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,63,0.43694400787353516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,63,0.3818560043970744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,63,0.3817760149637858
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,127,0.4394506613413493
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,127,0.43914135297139484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,127,0.38393068313598633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,127,0.3847413460413615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,float16,255,0.44150932629903156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,float16,255,0.521562655766805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,128,1,float16,fp8,255,0.38389865557352704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,64,0,1,float16,fp8,255,0.46325333913167316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,1,0.33878934383392334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,1,0.33877333005269367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,1,0.2733599940935771
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,1,0.27297600110371906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,3,0.339354674021403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,3,0.33902935187021893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,3,0.27321066459019977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,3,0.27300800879796344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,7,0.35069334506988525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,7,0.35077333450317383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,7,0.28390934069951373
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,7,0.28353599707285565
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,15,0.3588053385416667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,15,0.358458677927653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,15,0.2958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,15,0.29574400186538696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,31,0.43306132157643634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,31,0.43317333857218426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,63,0.43691734472910565
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,31,0.3796000083287557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,31,0.37993065516153973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,63,0.4368533293406169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,63,0.3817439874013265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,127,0.44065598646799725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,63,0.38178133964538574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,127,0.4410826762517293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,127,0.38391466935475665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,127,0.3852800130844116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,float16,255,0.4411413272221883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,128,1,float16,fp8,255,0.38546133041381836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,float16,255,0.5228426853815714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,1,0.3405066728591919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,64,0,1,float16,fp8,255,0.46326935291290283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,1,0.3404373327891032
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,1,0.2752799987792969
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,1,0.27534933884938556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,3,0.3408213456471761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,3,0.340554674466451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,3,0.27458133300145465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,3,0.27501867214838666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,7,0.3510133425394694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,7,0.35091733932495117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,7,0.2853066722551982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,7,0.2853546738624573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,15,0.35879464944203693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,15,0.35859731833140057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,15,0.29581334193547565
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,31,0.4329599936803182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,15,0.2974399924278259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,31,0.4331786632537842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,31,0.3798133134841919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,31,0.37944531440734863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,63,0.4366613229115804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,63,0.4370186726252238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,63,0.3824106852213542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,63,0.38309868176778156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,127,0.4408746560414632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,127,0.44071467717488605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,float16,255,0.44202133019765216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,127,0.3855839967727661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,127,0.38423999150594074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,1,0.03482133398453394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,float16,255,0.5226133267084757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,128,1,float16,fp8,255,0.3856906493504842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,3,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,1,0.03387733300526937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,64,0,1,float16,fp8,255,0.4635946750640869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,1,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,1,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,3,0.0340693344672521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,3,0.03268799930810928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,3,0.03306133300065994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,7,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,7,0.03402133285999298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,7,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,7,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,15,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,15,0.03422400106986364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,15,0.03226666649182638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,15,0.032474666833877563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,31,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,31,0.03375466664632162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,31,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,31,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,63,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,63,0.03440533330043157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,63,0.032485333581765495
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,63,0.0329120010137558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,127,0.03408533334732056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,127,0.035189333061377205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,127,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,127,0.033413333197434746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,float16,255,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,float16,255,0.034485332667827606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,128,1,float16,fp8,255,0.033610666791598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,64,0,1,float16,fp8,255,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,1,0.6722346941630045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,1,0.6727840105692545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,1,0.5363893508911133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,1,0.5354506572087606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,3,0.6722400188446045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,3,0.6725920041402181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,3,0.5341653426488241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,3,0.5339306592941284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,7,0.6933279832204183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,7,0.693615992863973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,7,0.5616586605707804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,7,0.560912013053894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,15,0.7099733352661133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,15,0.7096213499704996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,15,0.5861440102259318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,15,0.5861973365147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,31,0.8590826988220215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,31,0.8591146469116211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,31,0.7526559829711914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,31,0.7526346842447916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,63,0.8667360146840414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,63,0.8666400114695231
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,63,0.7586133480072021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,63,0.7583306630452474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,float16,127,0.8736639817555746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,float16,127,0.8737653096516927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,128,1,float16,fp8,127,0.7631626923878988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,1,0.6722933451334635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,64,0,1,float16,fp8,127,0.7631359895070394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,1,0.6721813678741455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,1,0.5401386817296346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,1,0.5392853418986002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,3,0.6725813547770182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,3,0.672549327214559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,3,0.5402026573816935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,3,0.539466659228007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,7,0.6940213044484457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,7,0.6933279832204183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,7,0.5619466702143351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,7,0.5622506539026896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,15,0.7096746762593588
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,15,0.7106880346934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,15,0.5862933397293091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,15,0.5862026611963908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,31,0.860490640004476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,31,0.8597813447316488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,31,0.7538987000783285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,31,0.7526400089263916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,63,0.8672853310902914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,63,0.8673120339711508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,63,0.7588053544362386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,63,0.7633492946624756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,float16,127,0.8755786418914795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,float16,127,0.8749279975891113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,128,1,float16,fp8,127,0.763866662979126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,1,0.6746453444163004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,1,0.6740427017211914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,64,0,1,float16,fp8,127,0.7630773385365804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,1,0.5440853436787924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,1,0.5441866715749105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,3,0.6745813687642416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,3,0.6749707063039144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,3,0.5441439946492513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,3,0.5436160167058309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,7,0.6947893301645914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,7,0.6948266824086508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,7,0.5622773170471191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,7,0.5626506805419922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,15,0.7098773320515951
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,15,0.7095733483632406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,15,0.5865813493728638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,15,0.5864533185958862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,31,0.8607359727223715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,31,0.8603306611378988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,31,0.7540586789449056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,31,0.7541013558705648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,63,0.8676160176595052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,63,0.8675839900970459
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,63,0.7588106791178385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,63,0.7587947050730387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,1,0.05898666878541311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,float16,127,0.8752480347951254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,128,1,float16,fp8,127,0.7641653219858805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,float16,127,0.8757387002309164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,1,0.05991466840108236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,1,0.05585066477457682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,64,0,1,float16,fp8,127,0.764357328414917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,1,0.05587733288606008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,3,0.05849599838256836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,3,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,7,0.058277333776156105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,3,0.055888002117474876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,3,0.055914665261904396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,7,0.058970664938290916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,7,0.05585066477457682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,7,0.05615466833114624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,15,0.055946667989095054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,15,0.05909866591294607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,15,0.059802666306495667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,15,0.055871998270352684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,31,0.059248000383377075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,31,0.05819733440876007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,63,0.059877331058184304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,31,0.055946667989095054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,31,0.05585066477457682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,63,0.06000000238418579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,float16,127,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,float16,127,0.06168533364931742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,63,0.05585599939028422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,63,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,1,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,128,1,float16,fp8,127,0.056202664971351624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,1,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,64,0,1,float16,fp8,127,0.05599466462930044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,1,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,1,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,7,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,3,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,3,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,3,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,3,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,7,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,7,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,7,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,15,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,15,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,15,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,15,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,31,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,31,0.020037333170572918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,31,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,31,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,63,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,63,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,63,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,63,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,127,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,127,0.019626667102177937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,127,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,127,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,255,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,255,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,255,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,255,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,511,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,511,0.031898667414983116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,511,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,511,0.02938666691382726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,1023,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,1023,0.04966400067011515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,1023,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,1023,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,2047,0.019653332730134327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,2047,0.08486933509508769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,2047,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,4095,0.15242666999499002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,8191,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,2047,0.08256533245245616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,4095,0.01958400011062622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,8191,0.297818660736084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,4095,0.15465066830317178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,4095,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,8191,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,8191,0.29475732644399005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,1,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,float16,16383,0.6195040146509806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,float16,16383,0.020714666694402695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,128,1,float16,fp8,16383,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,1,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,1,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,1,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,3,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,3,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,7,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,64,0,1,float16,fp8,16383,0.6214613517125448
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,3,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,3,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,7,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,7,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,7,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,15,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,15,0.018559999763965607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,15,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,15,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,31,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,31,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,31,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,31,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,63,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,63,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,63,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,63,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,127,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,127,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,127,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,127,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,255,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,255,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,255,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,255,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,1023,0.04981866478919983
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,511,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,511,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,511,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,511,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,2047,0.08476799726486206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,2047,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,1023,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,1023,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,1023,0.04755199948946635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,2047,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,2047,0.08247999846935272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,4095,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,4095,0.15453867117563883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,4095,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,4095,0.15240533153216043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,8191,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,8191,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,8191,0.29758399724960327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,8191,0.2956533432006836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,float16,16383,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,128,1,float16,fp8,16383,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,1,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,1,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,float16,16383,0.6196586688359579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,1,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,1,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,64,0,1,float16,fp8,16383,0.6195306777954102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,3,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,3,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,3,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,3,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,7,0.017535999417304993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,7,0.01870399961868922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,7,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,7,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,15,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,15,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,15,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,63,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,15,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,31,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,31,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,31,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,31,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,63,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,63,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,63,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,127,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,127,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,127,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,127,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,255,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,255,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,255,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,255,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,1023,0.049509331583976746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,1023,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,1023,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,2047,0.0199946661790212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,511,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,511,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,511,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,511,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,1023,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,2047,0.08481066425641377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,2047,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,8191,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,2047,0.08273600041866302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,4095,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,4095,0.15642666816711426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,4095,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,4095,0.15239999691645303
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,8191,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,8191,0.29950932661692303
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,8191,0.2947733402252197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,float16,16383,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,128,1,float16,fp8,16383,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,1,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,float16,16383,0.6445653438568115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,1,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,1,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,64,0,1,float16,fp8,16383,0.6225119829177856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,3,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,3,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,31,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,63,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,127,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,127,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,127,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,255,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,511,0.011610666910807291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,1023,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,511,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,1023,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,2047,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,2047,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,2047,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,2047,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,8191,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,4095,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,4095,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,4095,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,4095,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,8191,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,float16,16383,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,8191,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,8191,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,float16,16383,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,128,1,float16,fp8,16383,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,64,0,1,float16,fp8,16383,0.036288000643253326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,1,1.061018705368042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,1,1.337429364522298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,1,1.0623199939727783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,1,1.3361013730367024
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,3,1.3374560674031575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,3,1.06168532371521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,3,1.337765375773112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,3,1.0615253448486328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,7,1.3798133532206218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,7,1.1165173053741455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,7,1.3789653778076172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,7,1.1163520018259685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,15,1.4130454063415527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,15,1.1660693486531575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,15,1.4143306414286296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,15,1.1654400030771892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,31,1.7124160130818684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,31,1.7134933471679688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,31,1.4986666043599446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,31,1.499008019765218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,float16,63,1.7264374097188313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,128,1,float16,fp8,63,1.5108426411946614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,float16,63,1.7281440099080403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,64,0,1,float16,fp8,63,1.511232058207194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,1,1.3374613126118977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,1,1.0729066530863445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,1,1.3387413024902344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,1,1.0725226402282715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,3,1.3380853335062664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,3,1.0731039841969807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,3,1.3391253153483074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,3,1.0737653573354085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,7,1.3811306953430176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,7,1.1179893016815186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,7,1.3794293403625488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,7,1.1174506346384685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,15,1.412384033203125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,15,1.1653599739074707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,15,1.4130560557047527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,15,1.1655466556549072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,31,1.7137866020202637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,31,1.4989333152770996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,31,1.7126453717549641
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,31,1.500501314798991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,float16,63,1.7281866073608398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,128,1,float16,fp8,63,1.5111840565999348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,float16,63,1.7270453770955403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,64,0,1,float16,fp8,63,1.5119199752807617
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,1,1.3429706891377766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,1,1.3428799311319988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,1,1.084005355834961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,1,1.0828053156534831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,3,1.3448425928751628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,3,1.0850826899210613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,3,1.3446879386901855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,3,1.082703987757365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,7,1.3829493522644043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,7,1.1189440091451008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,7,1.3814133008321126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,7,1.1182560125986736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,15,1.4127306938171387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,15,1.412922700246175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,15,1.1671733061472576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,15,1.1672426859537761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,31,1.7155680656433105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,31,1.5004639625549316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,31,1.7157546679178874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,31,1.5004639625549316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,float16,63,1.7539626757303874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,128,1,float16,fp8,63,1.511151949564616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,float16,63,1.7605973879496257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,1,0.10896000266075134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,1,0.10905599594116211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,1,0.10214400291442871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,1,0.10167466600735982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,3,0.10948800047238667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,3,0.1092800001303355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,64,0,1,float16,fp8,63,1.5113706588745117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,3,0.10154133041699727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,3,0.10098666946093242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,7,0.10946666200955708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,7,0.10948266585667928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,7,0.10169066985448201
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,7,0.10115733742713928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,15,0.10904000202814738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,15,0.1090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,15,0.10110933581988017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,31,0.10973866780598958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,15,0.10299733281135559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,31,0.10921600461006165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,31,0.10264000296592712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,31,0.10175999999046326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,float16,63,0.10938666264216106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,float16,63,0.10923733313878377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,128,1,float16,fp8,63,0.10116799672444661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,64,0,1,float16,fp8,63,0.10223999619483948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,1,2.121818701426188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,1,2.668442726135254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,1,2.123664061228434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,1,2.67137082417806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,3,2.1217919985453286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,3,2.1203786532084146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,3,2.6758559544881186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,3,2.6703945795694985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,7,2.233269373575846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,7,2.770533243815104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,7,2.230197270711263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,7,2.770618756612142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,15,2.8367093404134116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,15,2.3321760495503745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,15,2.330165386199951
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,15,2.834335962931315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,fp8,31,2.9969278971354165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,1,2.14684263865153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,1,2.6796000798543296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,1,2.6857334772745767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,128,1,float16,float16,31,3.4433921178181968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,1,2.1505866050720215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,float16,31,3.442602793375651
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,64,0,1,float16,fp8,31,3.001333236694336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,3,2.1549866994222007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,3,2.1578399340311685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,3,2.680591901143392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,3,2.680901209513346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,7,2.236176013946533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,7,2.7910772959391275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,7,2.235525290171305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,7,2.783146540323893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,15,2.3363253275553384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,15,2.880037307739258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,15,2.336853345235189
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,15,2.8814614613850913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,float16,31,3.4866720835367837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,fp8,31,3.0063254038492837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,128,1,float16,fp8,31,3.008917490641276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,64,0,1,float16,float16,31,3.4941012064615884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,1,2.170373280843099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,1,2.176703929901123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,1,2.695237477620443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,1,2.7009706497192383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,3,2.7115306854248047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,3,2.1771839459737143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,3,2.1823946634928384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,3,2.7202720642089844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,7,2.2424000104268393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,7,2.248293399810791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,7,2.837631861368815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,7,2.845893224080404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,15,2.366154670715332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,15,3.000399907430013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,15,2.3718934059143066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,15,3.001845359802246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,1,0.20560532808303833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,1,0.20574933290481567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,1,0.19326933224995932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,1,0.19518399238586426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,fp8,31,3.05406920115153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,128,1,float16,float16,31,3.5620158513387046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,fp8,31,3.0563840866088867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,64,0,1,float16,float16,31,3.561722755432129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,3,0.20542933543523154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,3,0.2055093248685201
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,3,0.19506667057673135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,3,0.1952000061670939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,7,0.20553600788116455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,7,0.20536533991495767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,7,0.19503466288248697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,7,0.19453332821528116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,15,0.20544000466664633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,15,0.20544532934824625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,15,0.1946773330370585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,1,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,1,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,15,0.19509865840276083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,1,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,float16,31,0.2056480050086975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,1,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,float16,31,0.20599999030431113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,3,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,3,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,128,1,float16,fp8,31,0.19325333833694458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,3,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,64,0,1,float16,fp8,31,0.19404266277949014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,3,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,7,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,7,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,7,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,7,0.02347733328739802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,15,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,15,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,15,0.024986666937669117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,15,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,31,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,31,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,31,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,127,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,31,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,127,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,63,0.033770665526390076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,255,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,63,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,63,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,63,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,127,0.033573334415753685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,127,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,255,0.03773866593837738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,511,0.052042668064435325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,255,0.029690665503342945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,255,0.03530666728814443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,511,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,1023,0.08672533432642619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,511,0.0558240016301473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,511,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,1023,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,1023,0.08916800220807393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,2047,0.15440533558527628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,1023,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,2047,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,2047,0.1586186687151591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,2047,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,4095,0.03319466610749563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,4095,0.02940266579389572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,4095,0.2974933385848999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,float16,8191,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,4095,0.2916319966316223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,128,1,float16,fp8,8191,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,1,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,float16,8191,0.5759199857711792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,1,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,1,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,64,0,1,float16,fp8,8191,0.5702773332595825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,1,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,3,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,3,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,3,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,3,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,7,0.027482666075229645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,7,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,7,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,7,0.02438933402299881
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,15,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,15,0.0288426677385966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,15,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,63,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,15,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,31,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,31,0.03242666771014532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,31,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,31,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,63,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,63,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,63,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,127,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,127,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,127,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,127,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,255,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,255,0.03877866764863332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,511,0.05202666421731313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,255,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,1023,0.09060800075531006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,255,0.0355679988861084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,511,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,2047,0.03364799916744232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,511,0.055813332398732506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,2047,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,511,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,1023,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,1023,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,1023,0.08661866188049316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,2047,0.15850667158762613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,2047,0.1548373301823934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,4095,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,4095,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,4095,0.2974399924278259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,float16,8191,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,4095,0.2925013303756714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,128,1,float16,fp8,8191,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,1,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,float16,8191,0.5752426783243815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,1,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,1,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,64,0,1,float16,fp8,8191,0.5694346825281779
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,1,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,3,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,3,0.02756800005833308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,3,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,3,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,7,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,7,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,7,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,7,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,15,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,15,0.02779199928045273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,15,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,15,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,31,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,31,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,31,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,31,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,63,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,63,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,63,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,63,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,127,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,127,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,127,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,127,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,511,0.05578133463859558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,255,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,255,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,255,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,1023,0.09059199690818787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,255,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,1023,0.08657067020734151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,511,0.03349866718053818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,511,0.029685333371162415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,2047,0.1583093305428823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,511,0.05209066470464071
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,4095,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,1023,0.03321066747109095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,1023,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,2047,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,float16,8191,0.033146666983763375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,2047,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,2047,0.15524267156918845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,4095,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,8191,0.5820959806442261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,float16,4095,0.2980266610781352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,4095,0.2916746735572815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,128,1,float16,fp8,8191,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,1,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,1,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,64,0,1,float16,fp8,8191,0.5693653424580892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,1,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,3,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,3,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,7,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,7,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,15,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,15,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,15,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,31,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,63,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,127,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,127,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,127,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,255,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,255,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,255,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,511,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,1023,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,511,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,1023,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,2047,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,511,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,1023,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,1023,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,2047,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,4095,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,2047,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,2047,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,4095,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,4095,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,4095,0.023503998915354412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,float16,8191,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,float16,8191,0.04223466912905375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,128,1,float16,fp8,8191,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,64,0,1,float16,fp8,8191,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,1,0.03944533318281174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,1,0.04798933366934458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,15,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,3,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,3,0.048512001832326256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,31,0.05826666454474131
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,31,0.05190399785836538
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,63,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,63,0.05991466840108236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,7,0.0496319979429245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,7,0.04364799956480662
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,127,0.0701333334048589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,15,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,127,0.06406400104363759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,255,0.10338667035102844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,255,0.09675199786822002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,1023,0.29338665803273517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,511,0.1708959937095642
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,511,0.1628106633822123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,1023,0.30979732672373456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,2047,0.5827253262201945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,1,0.04775999983151754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,2047,0.5561279853185018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,7,0.04980800052483877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,3,0.049813335140546165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,3,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,7,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,float16,4095,1.136794646581014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,1,128,0,1,float16,fp8,4095,1.0794453620910645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,15,0.05983999868233999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,15,0.05362133185068766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,1,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,31,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,31,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,63,0.060047999024391174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,63,0.05372266471385956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,511,0.16268266240755716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,1023,0.3099199930826823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,127,0.07020266850789388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,1023,0.29339732726414997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,127,0.06438399851322174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,255,0.10315199693044026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,255,0.09710933764775594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,511,0.17268800735473633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,2047,0.5839200019836426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,2047,0.5561173359553019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,1,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,1,0.03945599993069967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,3,0.04850133260091146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,3,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,float16,4095,1.148154656092326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,7,0.049695998430252075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,2,128,0,1,float16,fp8,4095,1.0800960063934326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,7,0.04353600243727366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,15,0.059978668888409935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,15,0.053216000398000084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,31,0.05865600208441416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,31,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,63,0.05985066791375478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,63,0.05369600156943003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,127,0.07011199990908305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,127,0.06444799900054932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,255,0.1048479974269867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,255,0.09705600142478943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,511,0.17084799210230509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,511,0.16273599863052368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,1023,0.30980799595514935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,1,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,1023,0.29345067342122394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,2047,0.5911733309427897
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,2047,0.5576213200887045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,1,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,3,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,15,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,3,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,7,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,float16,4095,1.1640266577402751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,4,128,0,1,float16,fp8,4095,1.0935893058776855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,7,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,15,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,31,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,31,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,63,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,63,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,127,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,127,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,255,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,255,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,511,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,511,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,1023,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,1023,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,1,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,2047,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,2047,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,float16,4095,0.060778667529424034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,128,8,128,0,1,float16,fp8,4095,0.04347200194994608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,1,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,3,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,15,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,31,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,3,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,7,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,7,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,15,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,63,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,63,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,127,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,127,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,1023,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,255,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,2047,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,255,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,4095,0.06030400097370148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,511,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,511,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,1023,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,2047,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,4095,0.06427733103434245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,8191,0.1111893355846405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,8191,0.09915733337402344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,float16,16383,0.20357867081960043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,1,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,1,128,0,1,float16,fp8,16383,0.1764906644821167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,1,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,3,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,7,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,7,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,15,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,31,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,63,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,63,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,127,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,127,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,255,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,255,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,2047,0.04142399877309799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,511,0.02348266790310542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,511,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,1023,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,1023,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,2047,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,4095,0.06435733536879222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,4095,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,8191,0.11106666922569275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,8191,0.0992693305015564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,float16,16383,0.2034346659978231
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,1,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,2,128,0,1,float16,fp8,16383,0.1768746574719747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,1,0.010506667196750641
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,3,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,3,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,7,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,7,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,15,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,15,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,31,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,63,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,63,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,127,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,127,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,255,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,255,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,2047,0.04160533348719279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,511,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,511,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,1023,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,1023,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,2047,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,4095,0.06435200075308482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,16383,0.17558934291203818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,4095,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,8191,0.1111893355846405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,fp8,8191,0.09893332918485005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,4,128,0,1,float16,float16,16383,0.20376533269882202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,15,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,63,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,127,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,127,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,255,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,2047,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,511,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,1023,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,2047,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,4095,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,4095,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,8191,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,float16,16383,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,128,8,128,0,1,float16,fp8,16383,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,1,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,3,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,3,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,7,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,7,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,15,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,15,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,31,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,31,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,63,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,63,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,127,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,127,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,255,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,255,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,2047,0.053717335065205894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,511,0.02120000123977661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,511,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,1023,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,1023,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,2047,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,4095,0.09475732843081157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,4095,0.0885599950949351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,8191,0.17675199111302695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,8191,0.1665546695391337
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,1,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,1,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,float16,16383,0.34250132242838544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,3,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,1,128,0,1,float16,fp8,16383,0.32198933760325116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,3,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,7,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,7,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,15,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,15,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,31,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,31,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,63,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,127,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,127,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,255,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,2047,0.05376000205675761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,255,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,511,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,511,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,1023,0.0332640012105306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,1023,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,2047,0.047983999053637184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,4095,0.09488000472386678
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,16383,0.3409759998321533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,4095,0.08870933453241985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,float16,8191,0.1770133376121521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,8191,0.16636799772580466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,1,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,1,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,2,128,0,1,float16,fp8,16383,0.3219199975331624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,3,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,3,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,7,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,7,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,15,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,15,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,31,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,31,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,63,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,63,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,127,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,127,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,255,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,2047,0.053957333167394005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,2047,0.049600000182787575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,255,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,511,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,511,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,1023,0.03269333392381668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,8191,0.1767573356628418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,1023,0.02940266579389572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,4095,0.09474133451779683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,4095,0.08904533584912618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,8191,0.16647467017173767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,float16,16383,0.34276266892751056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,4,128,0,1,float16,fp8,16383,0.32425065835316974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,3,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,15,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,63,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,127,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,255,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,511,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,511,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,1023,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,8191,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,2047,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,2047,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,4095,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,4095,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,8191,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,float16,16383,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,7,0.09117866555849712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,128,8,128,0,1,float16,fp8,16383,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,1,0.08798399567604065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,15,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,15,0.09908800323804219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,3,0.09066667159398396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,1,0.07416533430417378
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,3,0.07658666869004567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,7,0.08028799792130788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,31,0.11315199732780457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,31,0.09916266798973083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,63,0.11331199606259663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,63,0.09986666838328044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,127,0.1318933367729187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,127,0.12165866295496623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,255,0.20127999782562256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,255,0.18688533703486124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,511,0.33477334181467694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,1,0.08682666222254436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,511,0.3163306713104248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,1023,0.6091200113296509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,1023,0.5763253370920817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,7,0.09285866220792134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,7,0.08061866462230682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,1,0.07457066575686137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,3,0.09100266297658284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,3,0.07658666869004567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,float16,2047,1.155232032140096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,1,128,0,1,float16,fp8,2047,1.095743974049886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,15,0.1132533351580302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,15,0.09889599680900574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,31,0.11310399572054546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,31,0.10060800115267436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,63,0.11356799801190694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,63,0.10077333450317383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,511,0.3179200092951457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,127,0.13365866740544638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,127,0.12166399757067363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,255,0.20130133628845215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,255,0.18699200948079428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,1,0.08691199620564778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,511,0.3364373445510864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,1023,0.6096426645914713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,1023,0.5760639905929565
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,1,0.07442133128643036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,3,0.0906773308912913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,3,0.07656533519426982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,float16,2047,1.1664213339487712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,7,0.09398399790128072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,2,128,0,1,float16,fp8,2047,1.095136006673177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,7,0.08040533463160197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,15,0.1132319966952006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,15,0.0992746651172638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,31,0.11322666207949321
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,31,0.09954667091369629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,63,0.113237331310908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,63,0.10083199540774028
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,127,0.13357333342234293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,127,0.12178666392962138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,255,0.2011893391609192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,255,0.18714666366577148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,511,0.3364106814066569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,511,0.3165173331896464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,1023,0.61954665184021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,1023,0.576314647992452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,3,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,1,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,1,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,float16,2047,1.1752586364746094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,3,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,4,128,0,1,float16,fp8,2047,1.1130666732788086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,7,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,7,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,15,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,15,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,31,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,31,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,255,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,63,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,63,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,127,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,127,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,1023,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,255,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,1,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,511,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,511,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,1023,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,float16,2047,0.06181333462397257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,128,8,128,0,1,float16,fp8,2047,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,1,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,3,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,3,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,7,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,7,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,15,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,15,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,31,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,63,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,31,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,63,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,127,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,1023,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,127,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,255,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,255,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,511,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,511,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,4095,0.1666719913482666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,1023,0.05217599868774414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,2047,0.09468266367912292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,2047,0.09071999788284302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,4095,0.17890133460362753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,8191,0.3469333251317342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,8191,0.31997867425282794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,1,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,1,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,7,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,3,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,float16,16383,0.6962773005167643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,3,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,1,128,0,1,float16,fp8,16383,0.6236799955368042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,7,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,15,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,15,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,31,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,31,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,63,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,63,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,127,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,127,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,255,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,255,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,511,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,511,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,1023,0.05202666421731313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,1023,0.0518453319867452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,2047,0.09461333354314168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,2047,0.09077866872151692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,4095,0.17878933747609457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,4095,0.1668213407198588
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,8191,0.34931198755900067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,8191,0.3197653293609619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,1,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,1,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,3,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,float16,16383,0.7017652988433838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,2,128,0,1,float16,fp8,16383,0.6258240143458048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,3,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,7,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,7,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,15,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,15,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,31,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,31,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,63,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,63,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,127,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,127,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,255,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,255,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,2047,0.09073066711425781
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,511,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,4095,0.17908267180124918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,511,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,1023,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,1023,0.052015999952952065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,2047,0.09462933739026387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,4095,0.16816532611846924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,8191,0.34911465644836426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,8191,0.31992532809575397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,1,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,1,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,float16,16383,0.7145866552988688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,4,128,0,1,float16,fp8,16383,0.6279679934183756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,15,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,31,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,255,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,511,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,1023,0.011909333368142446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,4095,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,1023,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,2047,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,2047,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,4095,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,8191,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,8191,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,float16,16383,0.03858133405447006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,128,8,128,0,1,float16,fp8,16383,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,1,0.16858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,1,0.14065066973368326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,3,0.1763146718343099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,15,0.19132266441980997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,3,0.1460586686929067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,7,0.1769226590792338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,7,0.1541920006275177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,15,0.21823465824127197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,31,0.21983466545740762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,127,0.23747734228769937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,31,0.19297067324320474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,63,0.22000000874201456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,63,0.19315199057261148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,127,0.2590133349100749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,255,0.39441601435343426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,255,0.3652160167694092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,511,0.6594719886779785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,511,0.6213119824727377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,1,0.16854933897654215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,1,0.1418293317159017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,3,0.1758613387743632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,3,0.1463093360265096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,15,0.21976532538731894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,float16,1023,1.2019253571828206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,1,128,0,1,float16,fp8,1023,1.1333226362864177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,31,0.19338667392730713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,7,0.1790293256441752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,7,0.15450666348139444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,15,0.1914666692415873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,31,0.21955732504526773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,63,0.22006400426228842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,63,0.19339734315872192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,127,0.25914132595062256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,127,0.23773332436879477
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,255,0.3943146864573161
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,255,0.36511464913686115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,511,0.660320003827413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,511,0.6215146780014038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,1,0.16850133736928305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,1,0.14216533303260803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,3,0.17561600605646768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,3,0.1462506651878357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,float16,1023,1.2183626492818196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,2,128,0,1,float16,fp8,1023,1.1326613426208496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,7,0.17914666732152304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,7,0.15405333042144775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,15,0.21965867280960083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,15,0.19315199057261148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,31,0.219760000705719
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,31,0.19300800561904907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,63,0.22004799048105875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,63,0.19347200791041055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,127,0.2606346607208252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,127,0.23635200659434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,255,0.3942879835764567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,255,0.3654826482137044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,511,0.6778293450673422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,3,0.024906667570273083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,511,0.6214079856872559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,1,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,1,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,3,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,7,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,float16,1023,1.226080020268758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,7,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,4,128,0,1,float16,fp8,1023,1.1592480341593425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,15,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,15,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,31,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,31,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,63,0.024773334463437397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,63,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,511,0.03922666609287262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,127,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,127,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,255,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,255,0.023402666052182514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,511,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,float16,1023,0.057850668827692665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,128,8,128,0,1,float16,fp8,1023,0.04334933559099833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,1,0.3305973410606384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,1,0.2752586603164673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,3,0.3466613292694092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,3,0.2873973250389099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,7,0.3497653404871623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,15,0.43160001436869305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,7,0.30161066850026447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,15,0.3776906728744507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,31,0.4326666593551636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,31,0.3791360060373942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,63,0.4352000157038371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,63,0.38145601749420166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,127,0.5124106804529825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,127,0.46583465735117596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,1,0.3306079904238383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,1,0.2752959926923116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,3,0.34673066933949787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,float16,255,0.7830506960550944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,1,128,0,1,float16,fp8,255,0.7215733528137207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,3,0.28724799553553265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,7,0.3537760178248088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,7,0.30180267492930096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,15,0.4325653314590454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,15,0.3777386744817098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,31,0.43298133214314777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,31,0.37928001085917157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,127,0.514410654703776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,63,0.4352426528930664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,63,0.3798826535542806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,127,0.46743468443552655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,1,0.33058667182922363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,float16,255,0.7825653553009033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,1,0.27713600794474286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,2,128,0,1,float16,fp8,255,0.7233440081278483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,7,0.3020426630973816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,3,0.34670400619506836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,3,0.28752533594767254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,7,0.35312533378601074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,15,0.4325173298517863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,15,0.3776906728744507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,31,0.43249066670735675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,31,0.379962682723999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,63,0.4362879991531372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,63,0.381440003712972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,127,0.5144799947738647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,127,0.4677813450495402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,1,0.039477333426475525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,1,0.03752533346414566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,float16,255,0.8086026509602865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,7,0.03749866783618927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,3,0.040005333721637726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,4,128,0,1,float16,fp8,255,0.7232639789581299
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,3,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,7,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,15,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,15,0.03735466549793879
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,31,0.03994666785001755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,31,0.037530665596326195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,63,0.04068800061941147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,63,0.037248000502586365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,127,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,127,0.037658666570981346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,float16,255,0.04345066845417023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,128,8,128,0,1,float16,fp8,255,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,1,0.6548906564712524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,1,0.543445348739624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,3,0.5661973158518473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,3,0.6877386569976807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,7,0.6945599714914957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,7,0.5963840087254842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,15,0.8588319619496664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,15,0.7492853005727133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,31,0.8591519991556803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,31,0.7519146601359049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,63,0.8661653200785319
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,63,0.7549013296763102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,float16,127,1.0188693205515544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,1,128,0,1,float16,fp8,127,0.9269546667734782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,1,0.6537546714146932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,1,0.5436533292134603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,3,0.6886719862620035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,3,0.5680906772613525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,7,0.703333298365275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,7,0.5985120137532552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,15,0.859386682510376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,15,0.7491679986317953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,31,0.8604586919148763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,31,0.7518293062845866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,63,0.8655573527018229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,63,0.7559839884440104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,float16,127,1.0289119879404705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,1,0.6523040135701498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,2,128,0,1,float16,fp8,127,0.9297599792480469
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,1,0.5472960074742635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,3,0.6881759961446127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,3,0.5684213240941366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,7,0.703333298365275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,7,0.5988639990488688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,15,0.7502613067626953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,15,0.8590186436971029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,31,0.8608266512552897
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,31,0.7526826858520508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,63,0.8715466658274332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,63,0.7563146750132242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,1,0.07042133311430614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,1,0.06507200002670288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,3,0.07115200161933899
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,float16,127,1.0671199957529705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,3,0.06605333089828491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,4,128,0,1,float16,fp8,127,0.9331733385721842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,7,0.0707946668068568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,7,0.0643039991458257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,15,0.07126933336257935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,15,0.06497600177923839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,31,0.07108800113201141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,31,0.06532266736030579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,63,0.07055466870466869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,63,0.06403199831644694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,1,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,float16,127,0.07167466481526692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,1,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,128,8,128,0,1,float16,fp8,127,0.06618133187294006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,3,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,3,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,7,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,7,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,15,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,15,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,31,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,31,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,255,0.029338667790095013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,63,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,63,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,1023,0.08449066678682964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,127,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,127,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,255,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,511,0.04864533245563507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,511,0.045909335215886436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,1023,0.08137066662311554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,2047,0.15447466572125754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,2047,0.15040533741315207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,4095,0.29759466648101807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,4095,0.28963200251261395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,8191,0.5820800065994263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,8191,0.5675253470738729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,1,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,1,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,3,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,3,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,7,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,7,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,15,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,float16,16383,1.2201386292775471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,15,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,1,128,0,1,float16,fp8,16383,1.3702932993570964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,31,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,31,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,63,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,63,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,511,0.046384001771608986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,127,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,127,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,255,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,255,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,511,0.04927466809749603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,1023,0.08454400300979614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,1023,0.08152000109354655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,2047,0.15445866187413534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,2047,0.15013866623242697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,4095,0.2982293367385864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,4095,0.28860267003377277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,8191,0.5846133232116699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,1,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,8191,0.5702613194783529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,1,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,3,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,3,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,7,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,float16,16383,1.2916746934254963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,7,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,15,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,2,128,0,1,float16,fp8,16383,1.3705387115478516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,15,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,31,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,31,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,63,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,63,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,127,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,511,0.0466186652580897
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,127,0.02035733312368393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,255,0.03126399964094162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,255,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,511,0.04786666731039683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,1023,0.08457600076993306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,1023,0.08102400104204814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,2047,0.15440533558527628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,2047,0.15029866496721903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,4095,0.2976800004641215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,4095,0.2895680069923401
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,8191,0.5865813493728638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,8191,0.5683306852976481
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,1,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,3,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,15,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,7,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,float16,16383,1.4188319842020671
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,4,128,0,1,float16,fp8,16383,1.4970347086588542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,31,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,63,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,127,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,511,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,4095,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,511,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,1023,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,1023,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,2047,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,2047,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,4095,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,8191,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,8191,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,float16,16383,0.05968533456325531
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,128,8,128,0,1,float16,fp8,16383,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,1,1.305066665013631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,1,1.081664005915324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,3,1.371664047241211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,3,1.1293333371480305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,7,1.1885226567586262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,7,1.3902880350748699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,15,1.494368076324463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,15,1.7153387069702148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,31,1.501050631205241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,31,1.7202612559000652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,fp8,63,1.5110079447428386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,1,1.084122657775879
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,1,1.3023413022359211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,1,128,0,1,float16,float16,63,1.7459519704182942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,3,1.136133352915446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,3,1.3755733172098796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,7,1.1970986525217693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,7,1.404037316640218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,15,1.497114658355713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,15,1.720293362935384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,31,1.7337759335835774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,31,1.511525313059489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,float16,63,1.7720905939737956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,2,128,0,1,float16,fp8,63,1.5336267153422039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,1,1.3118560314178467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,1,1.091370662053426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,3,1.1423892974853516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,3,1.3787733713785808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,7,1.1982933680216472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,7,1.4219199816385906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,15,1.5053067207336426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,15,1.7404319445292156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,1,0.1279253363609314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,1,0.11960533261299133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,31,1.5313706398010254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,31,1.7645759582519531
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,3,0.12847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,3,0.11937066912651062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,fp8,63,1.5662506421407063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,4,128,0,1,float16,float16,63,1.7815093994140625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,7,0.12821867068608603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,7,0.11967466274897258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,15,0.12778666615486145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,15,0.11960533261299133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,31,0.12777599692344666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,31,0.11929066975911458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,float16,63,0.12759466965993246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,128,8,128,0,1,float16,fp8,63,0.12013866504033406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,1,2.3121439615885415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,3,2.364352067311605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,1,2.794047991434733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,3,2.8932854334513345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,7,2.4667840003967285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,7,2.9313599268595376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,15,3.5020745595296225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,15,3.0500106811523438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,1,2.3462559382120767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,1,2.8712266286214194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,fp8,31,3.062704086303711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,1,128,0,1,float16,float16,31,3.5006561279296875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,3,2.386202653249105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,7,2.980794588724772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,15,3.075599988301595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,15,3.5128908157348633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,31,3.0856641133626304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,1,2.8992268244425454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,31,3.5167039235432944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,1,2.3836800257364907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,float16,3,2.9347947438557944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,3,2.4219253857930503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,3,2.9566933314005532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,7,2.5208373069763184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,7,3.0045013427734375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,15,3.5307092666625977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,1,0.2425546646118164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,1,0.22709866364796957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,15,3.087600072224935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,3,0.24215465784072876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,float16,31,3.536224047342936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,3,0.22749867041905722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,2,128,0,1,float16,fp8,7,2.493120034535726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,1,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,7,0.24241065979003906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,4,128,0,1,float16,fp8,31,3.099760055541992
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,7,0.22715200980504355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,1,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,15,0.24246933062871298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,3,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,15,0.22645866870880127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,3,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,7,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,float16,31,0.24187199274698892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,7,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,15,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,128,8,128,0,1,float16,fp8,31,0.22738132874170938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,15,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,31,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,31,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,63,0.031983998914559685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,63,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,127,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,127,0.0352960005402565
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,255,0.05585599939028422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,255,0.052111998200416565
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,511,0.08896000186602275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,511,0.08543999989827473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,1023,0.15858667095502219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,1023,0.15243732929229736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,2047,0.2977706591288249
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,2047,0.28545065720876056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,4095,0.5784159898757935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,4095,0.5534133513768514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,1,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,1,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,3,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,3,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,float16,8191,1.1361066500345867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,7,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,7,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,1,128,0,1,float16,fp8,8191,1.0909600257873535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,15,0.03169066707293192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,15,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,31,0.031301334500312805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,255,0.05571199953556061
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,31,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,63,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,63,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,127,0.037418665985266365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,127,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,255,0.052005335688591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,511,0.08898666501045227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,2047,0.28548266490300495
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,511,0.08557867010434468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,1023,0.15852800011634827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,1023,0.15244266390800476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,2047,0.2977493405342102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,4095,0.578442653020223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,1,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,4095,0.5534186760584513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,float16,8191,1.1524906953175862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,1,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,3,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,15,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,3,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,7,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,2,128,0,1,float16,fp8,8191,1.08953062693278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,7,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,15,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,31,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,255,0.05551466842492422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,31,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,255,0.05195199946562449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,63,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,511,0.08478400111198425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,63,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,127,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,127,0.035242666800816856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,511,0.08868267138799031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,1023,0.15842666228612265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,1023,0.15227199594179788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,2047,0.29781333605448407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,2047,0.2884693344434102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,4095,0.5828213294347128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,4095,0.5537173350652059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,3,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,1,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,3,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,float16,8191,1.1675199667612712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,7,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,4,128,0,1,float16,fp8,8191,1.0972959995269775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,15,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,127,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,63,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,127,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,1023,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,255,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,511,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,1023,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,2047,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,2047,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,4095,0.0377813329299291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,4095,0.026895999908447266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,float16,8191,0.05909333129723867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,3,0.03753600021203359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,128,8,128,0,1,float16,fp8,8191,0.04191466669241587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,1,0.037503999968369804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,1,0.03730666637420654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,7,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,1,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,7,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,1,0.03178133318821589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,3,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,3,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,3,0.031685332457224526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,7,0.037392000357309975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,31,0.045850664377212524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,7,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,15,0.0396373321612676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,15,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,15,0.03367999941110611
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,15,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,31,0.04563733438650767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,31,0.04168533285458883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,31,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,63,0.04593066871166229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,127,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,63,0.04588800172011057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,63,0.04155199974775314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,63,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,127,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,127,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,127,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,511,0.08080533146858215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,255,0.046165332198143005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,255,0.05403733253479004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,1023,0.0476746658484141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,255,0.04189866781234741
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,255,0.04990399877230326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,511,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,511,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,511,0.07462400197982788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,1023,0.12980799873669943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,1023,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,1023,0.12588266531626383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,2047,0.04754666487375895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,2047,0.23415466149648032
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,2047,0.04194133480389913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,float16,4095,0.04783466458320618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,2047,0.22803199291229248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,128,1,float16,fp8,4095,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,float16,4095,0.4392799933751424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,1,0.0373333344856898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,1,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,64,0,1,float16,fp8,4095,0.4325439929962158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,1,0.03175999969244003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,1,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,3,0.037392000357309975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,3,0.03746666759252548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,7,0.03356799980004629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,3,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,15,0.03950933367013931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,15,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,3,0.03215999901294708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,7,0.037733333806196846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,31,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,7,0.03790933390458425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,7,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,15,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,15,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,31,0.04595200220743815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,31,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,31,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,63,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,63,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,63,0.04161600023508072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,63,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,255,0.046575998266537987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,127,0.04770133395989736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,127,0.04758933186531067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,127,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,127,0.04187199970086416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,255,0.05526400109132131
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,255,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,255,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,511,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,511,0.08074666559696198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,511,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,1023,0.1258080005645752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,511,0.07534400125344594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,1023,0.04790399968624115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,2047,0.04177600145339966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,1023,0.13014400005340576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,1023,0.0415040006240209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,2047,0.04785599807898203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,2047,0.2339093287785848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,2047,0.22821333010991415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,float16,4095,0.04775466521581014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,128,1,float16,fp8,4095,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,float16,4095,0.44644800821940106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,1,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,1,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,1,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,64,0,1,float16,fp8,4095,0.43594666322072345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,1,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,3,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,7,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,3,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,3,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,3,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,15,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,7,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,31,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,7,0.012213333199421564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,7,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,15,0.012576000144084295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,63,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,15,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,15,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,31,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,127,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,31,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,31,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,63,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,63,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,63,0.012421333541472753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,127,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,255,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,127,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,127,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,255,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,255,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,255,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,1023,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,511,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,511,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,511,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,511,0.013658666362365087
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,1023,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,1023,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,1023,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,2047,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,4095,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,2047,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,1,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,fp8,2047,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,2047,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,128,1,float16,float16,4095,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,float16,4095,0.04374399781227112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,64,0,1,float16,fp8,4095,0.037104000647862755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,1,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,1,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,1,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,3,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,3,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,3,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,7,0.008661333471536636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,7,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,7,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,7,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,15,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,15,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,15,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,31,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,31,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,31,0.010298666854699453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,31,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,127,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,63,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,63,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,127,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,127,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,127,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,255,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,255,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,255,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,255,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,511,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,511,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,511,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,511,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,2047,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,1023,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,1023,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,1023,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,1023,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,2047,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,2047,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,2047,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,4095,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,4095,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,4095,0.023013333479563396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,4095,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,8191,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,8191,0.06410133341948192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,8191,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,8191,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,float16,16383,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,float16,16383,0.10937600334485371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,128,1,float16,fp8,16383,0.022986667851607006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,64,0,1,float16,fp8,16383,0.09890666604042053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,1,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,1,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,1,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,3,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,3,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,3,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,7,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,7,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,7,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,7,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,15,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,15,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,15,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,15,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,31,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,31,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,63,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,63,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,127,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,127,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,127,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,127,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,255,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,255,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,255,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,1023,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,255,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,511,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,511,0.019909333437681198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,511,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,2047,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,2047,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,511,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,1023,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,1023,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,1023,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,4095,0.04141866664091746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,2047,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,8191,0.06414933502674103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,2047,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,4095,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,4095,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,4095,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,8191,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,8191,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,8191,0.0602400004863739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,float16,16383,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,float16,16383,0.11044266819953918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,128,1,float16,fp8,16383,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,64,0,1,float16,fp8,16383,0.09921066959698994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,1,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,1,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,7,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,15,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,31,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,31,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,31,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,63,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,63,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,63,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,127,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,127,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,511,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,1023,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,2047,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,2047,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,1023,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,1023,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,2047,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,2047,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,8191,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,8191,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,4095,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,4095,0.014576000471909841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,4095,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,4095,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,8191,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,8191,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,float16,16383,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,float16,16383,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,128,1,float16,fp8,16383,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,3,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,64,0,1,float16,fp8,16383,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,1,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,1,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,1,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,1,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,3,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,3,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,3,0.016143999993801117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,7,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,7,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,7,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,7,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,15,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,15,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,15,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,15,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,31,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,31,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,31,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,31,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,63,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,63,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,127,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,63,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,63,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,127,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,127,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,127,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,255,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,255,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,255,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,255,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,511,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,511,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,511,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,511,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,1023,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,1023,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,1023,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,1023,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,2047,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,2047,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,4095,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,2047,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,2047,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,4095,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,4095,0.05359466870625814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,4095,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,8191,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,8191,0.09485333164532979
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,8191,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,8191,0.09073600172996521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,float16,16383,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,float16,16383,0.17681066195170084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,128,1,float16,fp8,16383,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,1,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,64,0,1,float16,fp8,16383,0.16572800278663635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,1,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,1,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,1,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,3,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,3,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,3,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,3,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,7,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,7,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,7,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,7,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,31,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,15,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,15,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,31,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,15,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,15,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,31,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,31,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,63,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,63,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,63,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,63,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,127,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,127,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,127,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,127,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,255,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,255,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,255,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,255,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,511,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,511,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,511,0.02082666630546252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,511,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,1023,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,1023,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,1023,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,4095,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,1023,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,2047,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,2047,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,2047,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,2047,0.03345066557327906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,4095,0.05376000205675761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,8191,0.09090666969617207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,4095,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,4095,0.05222400029500326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,8191,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,8191,0.09481066465377808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,8191,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,float16,16383,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,float16,16383,0.17521067460378012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,128,1,float16,fp8,16383,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,64,0,1,float16,fp8,16383,0.16479466358820596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,1,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,1,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,1,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,3,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,3,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,7,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,15,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,15,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,63,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,63,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,63,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,127,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,255,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,255,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,511,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,1023,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,1023,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,2047,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,4095,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,2047,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,2047,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,8191,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,4095,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,4095,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,4095,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,8191,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,8191,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,float16,16383,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,float16,16383,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,128,1,float16,fp8,16383,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,1,0.0572213331858317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,64,0,1,float16,fp8,16383,0.02021866664290428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,3,0.06922666728496552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,1,0.06841066479682922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,1,0.06880533198515575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,1,0.05629866818586985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,3,0.06902933120727539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,3,0.05619200070699056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,3,0.056128000219662987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,7,0.07065600156784058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,15,0.07221866647402446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,7,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,15,0.06026133398214976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,7,0.05818133552869161
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,7,0.05896000067392985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,15,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,15,0.06027733286221822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,63,0.0869706670443217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,63,0.0765226682027181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,31,0.08661333719889323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,31,0.08666132887204488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,127,0.08689600229263306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,31,0.07649600009123485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,31,0.0763733337322871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,255,0.08827733000119527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,63,0.0867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,63,0.07668800155321757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,127,0.08878399928410848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,127,0.07705066601435344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,127,0.0766186664501826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,255,0.10327466328938802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,255,0.07702933251857758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,255,0.09272533655166626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,511,0.08865599830945332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,511,0.15411200126012167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,511,0.07838400204976399
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,511,0.14215466380119324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,1023,0.08889066179593404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,1023,0.25281065702438354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,1023,0.07690133154392242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,1023,0.2425546646118164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,float16,2047,0.08847467104593913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,128,1,float16,fp8,2047,0.07860266665617625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,1,0.05610666672388712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,float16,2047,0.4577920039494832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,1,0.06844800213972728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,3,0.07017066578070323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,1,0.06835733354091644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,7,0.07076799869537354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,64,0,1,float16,fp8,2047,0.4455039898554484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,1,0.056143999099731445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,3,0.06855466465155284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,3,0.05683733522891998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,15,0.07250133156776428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,15,0.0601440022389094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,3,0.05602666735649109
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,7,0.07256533205509186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,7,0.058117335041364036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,7,0.05836800237496694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,15,0.07252799967924754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,15,0.06072533130645752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,31,0.0865226686000824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,63,0.0766293356815974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,31,0.0865226686000824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,31,0.07635200023651123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,31,0.07682133217652638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,63,0.08685866991678874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,127,0.0765066643555959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,63,0.08689600229263306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,63,0.07667733232180278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,255,0.07657066484292348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,127,0.08716799815495808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,127,0.0886346697807312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,127,0.07649066547552745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,255,0.0886240005493164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,255,0.10332799951235454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,1023,0.08854933579762776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,255,0.0927946666876475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,511,0.08874666690826416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,511,0.1542080044746399
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,511,0.07699200014273326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,511,0.14316800236701965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,1023,0.2535146673520406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,1023,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,1023,0.24437866608301798
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,float16,2047,0.08865066369374593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,128,1,float16,fp8,2047,0.07755733529726665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,float16,2047,0.46566398938496906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,1,0.01562133307258288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,1,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,64,0,1,float16,fp8,2047,0.4465706745783488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,1,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,3,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,1,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,7,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,3,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,3,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,15,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,3,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,15,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,7,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,31,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,7,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,7,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,15,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,15,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,63,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,31,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,63,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,31,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,31,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,63,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,63,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,255,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,127,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,127,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,127,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,127,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,255,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,255,0.01588800052801768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,255,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,1023,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,511,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,511,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,511,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,511,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,1023,0.016042667130629223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,1023,0.015781333049138386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,1023,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,float16,2047,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,float16,2047,0.04436799883842468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,128,1,float16,fp8,2047,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,64,0,1,float16,fp8,2047,0.03868266691764196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,3,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,1,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,1,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,1,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,1,0.013530666629473368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,3,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,3,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,3,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,7,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,7,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,7,0.013904000322024027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,7,0.015754666179418564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,15,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,15,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,15,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,15,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,31,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,31,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,31,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,31,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,63,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,63,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,127,0.015573333948850632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,63,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,63,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,127,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,127,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,127,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,255,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,255,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,255,0.016528000434239704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,1023,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,255,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,1023,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,511,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,511,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,511,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,511,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,2047,0.041349334021409355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,1023,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,1023,0.025498665869235992
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,2047,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,2047,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,2047,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,4095,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,4095,0.0783733328183492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,4095,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,4095,0.07017066578070323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,8191,0.12782933314641318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,8191,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,8191,0.1399999956289927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,8191,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,float16,16383,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,128,1,float16,fp8,16383,0.016154666741689045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,float16,16383,0.2626933256785075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,1,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,1,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,1,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,64,0,1,float16,fp8,16383,0.2444053292274475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,1,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,3,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,3,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,3,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,3,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,7,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,7,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,7,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,7,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,31,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,15,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,31,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,15,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,15,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,15,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,31,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,31,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,63,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,63,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,63,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,63,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,127,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,127,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,127,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,127,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,255,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,255,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,255,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,255,0.01676799977819125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,1023,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,511,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,1023,0.025701334079106648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,511,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,511,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,511,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,1023,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,4095,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,1023,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,2047,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,4095,0.07012266914049785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,2047,0.04760533571243286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,2047,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,8191,0.13980266451835632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,8191,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,2047,0.03976533313592275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,4095,0.0783679982026418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,4095,0.0164533331990242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,fp8,16383,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,8191,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,8191,0.12774399916330972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,128,1,float16,float16,16383,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,float16,16383,0.26283733050028485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,1,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,64,0,1,float16,fp8,16383,0.24287466208140054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,1,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,3,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,7,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,31,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,15,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,15,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,127,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,255,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,511,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,511,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,1023,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,2047,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,1023,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,1023,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,2047,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,4095,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,2047,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,4095,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,4095,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,4095,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,8191,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,8191,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,8191,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,8191,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,float16,16383,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,1,0.13186132907867432
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,float16,16383,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,128,1,float16,fp8,16383,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,64,0,1,float16,fp8,16383,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,1,0.13174399733543396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,1,0.1066986620426178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,1,0.10709866881370544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,3,0.1318986713886261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,3,0.13165332873662314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,3,0.10713600118954976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,3,0.10709866881370544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,7,0.13572266697883606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,7,0.13578133781750998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,7,0.11117333173751831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,7,0.11137066284815471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,15,0.13804800311724344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,15,0.13860799868901572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,15,0.11529599626859029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,15,0.11517866452534993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,31,0.16665066281954447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,31,0.16645333170890808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,31,0.14622933665911356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,31,0.14617600043614706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,127,0.16879467169443765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,63,0.16782933473587036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,63,0.16857065757115683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,63,0.14808533589045206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,63,0.1478506624698639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,127,0.16879467169443765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,127,0.14797332882881165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,255,0.1770240068435669
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,127,0.14826132853825888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,255,0.16883200407028198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,255,0.19961599508921304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,255,0.1483573317527771
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,511,0.1705333391825358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,511,0.2996319929758708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,511,0.14830399552981058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,511,0.27747732400894165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,float16,1023,0.17045332988103232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,1,0.13191999991734824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,128,1,float16,fp8,1023,0.14803733428319296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,float16,1023,0.49798401196797687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,1,0.13174399733543396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,64,0,1,float16,fp8,1023,0.47833065191904706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,1,0.10733333230018616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,1,0.107013334830602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,7,0.1360106666882833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,3,0.13185600439707437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,7,0.11110933621724446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,3,0.13201600313186646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,3,0.10738666852315266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,3,0.10712533195813496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,7,0.13583466410636902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,7,0.11109333237012227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,15,0.13802133003870645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,15,0.13800000150998434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,15,0.11547199885050456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,15,0.11547199885050456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,31,0.1664426624774933
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,31,0.16658666729927063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,31,0.14620799819628397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,31,0.1460640033086141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,63,0.1684053341547648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,63,0.16845866044362387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,127,0.14813333749771118
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,63,0.14805866281191507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,255,0.16875199476877847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,63,0.14798399806022644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,127,0.16897066434224448
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,127,0.168938676516215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,127,0.14822933077812195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,255,0.19914666811625162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,255,0.14847999811172485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,255,0.17869333426157633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,511,0.17015467087427774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,511,0.29946666955947876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,511,0.1483519971370697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,511,0.2792159914970398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,float16,1023,0.17027199268341064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,fp8,1023,0.4782506624857585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,128,1,float16,fp8,1023,0.14833600322405496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,64,0,1,float16,float16,1023,0.5096160173416138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,3,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,1,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,1,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,1,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,1,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,3,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,3,0.020351999749739964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,3,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,7,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,15,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,7,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,7,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,7,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,15,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,15,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,31,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,15,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,31,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,31,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,31,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,63,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,63,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,63,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,63,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,127,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,127,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,127,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,511,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,127,0.020869334538777668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,255,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,255,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,255,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,255,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,511,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,511,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,511,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,float16,1023,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,float16,1023,0.041893333196640015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,128,1,float16,fp8,1023,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,64,0,1,float16,fp8,1023,0.03597866743803024
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,1,0.2568906744321187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,1,0.2566453417142232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,1,0.20548800627390543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,1,0.20560000340143839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,3,0.25682665904362995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,3,0.2566933234532674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,3,0.20559465885162354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,3,0.20564266045888266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,7,0.26498667399088544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,7,0.2652053236961365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,7,0.21577600638071695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,7,0.21388266483942667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,15,0.2707680066426595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,15,0.2710026701291402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,15,0.223797341187795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,15,0.22382932901382446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,31,0.3261973261833191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,31,0.3266826669375102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,31,0.2874773343404134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,31,0.28728532791137695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,63,0.3285173376401265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,63,0.32893333832422894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,63,0.28775999943415326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,63,0.2877013285954793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,127,0.33063467343648273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,127,0.33241067330042523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,127,0.28993600606918335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,127,0.2895946701367696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,float16,255,0.33268266916275024
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,float16,255,0.3938986857732137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,128,1,float16,fp8,255,0.28973867495854694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,64,0,1,float16,fp8,255,0.3487573464711507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,1,0.25684799750645954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,1,0.2569173375765483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,1,0.20762133598327637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,1,0.2075786590576172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,3,0.25684799750645954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,3,0.25838400920232135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,3,0.20777066548665366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,3,0.20747733116149902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,7,0.2651946743329366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,7,0.2656853397687276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,15,0.27105067173639935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,7,0.2156213323275248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,7,0.21559999386469522
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,15,0.2711679935455322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,15,0.22393600145975748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,15,0.2237493395805359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,31,0.3264960050582886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,31,0.3264373342196147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,63,0.3288480043411255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,31,0.2873386740684509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,31,0.28757866223653156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,63,0.32891732454299927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,63,0.2890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,127,0.2895946701367696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,63,0.2882879972457886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,127,0.33237866560618085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,127,0.3325226704279582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,127,0.29047467311223346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,float16,255,0.33238933483759564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,float16,255,0.3923413356145223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,128,1,float16,fp8,255,0.28962133328119916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,1,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,64,0,1,float16,fp8,255,0.34869333108266193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,1,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,1,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,1,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,3,0.0332640012105306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,3,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,3,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,7,0.032933334509531655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,3,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,7,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,15,0.0332640012105306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,7,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,7,0.031658666829268135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,15,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,15,0.03166933357715607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,15,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,31,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,31,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,31,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,31,0.03141333411137263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,63,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,63,0.03355200091997782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,63,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,63,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,127,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,127,0.03334933271010717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,127,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,127,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,float16,255,0.03363733241955439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,float16,255,0.03362133353948593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,128,1,float16,fp8,255,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,64,0,1,float16,fp8,255,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,1,0.5070240100224813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,1,0.5070773363113403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,1,0.4031519889831543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,1,0.4046613375345866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,3,0.5066560109456381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,3,0.4039306640625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,3,0.5068906545639038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,3,0.4042400121688843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,7,0.5226666529973348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,7,0.5232746601104736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,7,0.4227199951807658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,7,0.4227733214696248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,15,0.5350773334503174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,15,0.5350346565246582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,15,0.44123733043670654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,15,0.44103999932607013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,31,0.6475199858347574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,31,0.6478933493296305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,31,0.5661760171254476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,31,0.5661760171254476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,63,0.6516106526056925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,63,0.6517173449198405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,63,0.5720533529917399
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,63,0.5704960028330485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,float16,127,0.6574186484018961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,float16,127,0.6578933397928873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,128,1,float16,fp8,127,0.573909322420756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,64,0,1,float16,fp8,127,0.5738453467686971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,1,0.5089813470840454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,1,0.40933334827423096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,1,0.5083039999008179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,3,0.5086613496144613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,1,0.40984535217285156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,3,0.5092000166575114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,3,0.40917332967122394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,3,0.41012267271677655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,7,0.5245920022328695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,7,0.5236266851425171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,7,0.4243306716283162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,7,0.4243359963099162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,15,0.5354719956715902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,15,0.5354719956715902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,15,0.44167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,15,0.4428960084915161
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,31,0.6476106643676758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,31,0.6459840138753256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,31,0.5671039819717407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,31,0.5675839980443319
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,63,0.6522719860076904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,63,0.6521600087483724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,63,0.5704533259073893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,63,0.5719253222147623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,float16,127,0.6579519907633463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,1,0.05611200133959452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,float16,127,0.6576106548309326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,1,0.0537120004494985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,128,1,float16,fp8,127,0.5742559830347697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,1,0.056618665655454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,64,0,1,float16,fp8,127,0.5745120048522949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,1,0.05375466744105021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,3,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,3,0.05624000231424967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,3,0.05372266471385956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,3,0.05373333394527435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,7,0.05589866638183594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,7,0.05596266686916351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,7,0.05367999772230784
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,7,0.053674668073654175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,15,0.056048000852266945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,15,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,31,0.056128000219662987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,15,0.053786665201187134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,15,0.053616002202034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,31,0.056128000219662987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,63,0.05620799958705902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,31,0.05385066568851471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,31,0.05382933219273885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,63,0.0561653325955073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,float16,127,0.057664001981417336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,float16,127,0.05726400017738342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,1,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,63,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,63,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,128,1,float16,fp8,127,0.05373866856098175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,1,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,64,0,1,float16,fp8,127,0.05485333502292633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,1,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,1,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,3,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,3,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,3,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,7,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,3,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,7,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,7,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,7,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,15,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,15,0.013248000293970108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,15,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,15,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,31,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,63,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,31,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,31,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,31,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,63,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,127,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,63,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,63,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,127,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,127,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,127,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,255,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,255,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,255,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,255,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,511,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,511,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,511,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,2047,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,511,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,1023,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,1023,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,1023,0.015509333461523056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,1023,0.04165866722663244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,2047,0.07252799967924754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,2047,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,2047,0.07050133248170216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,4095,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,4095,0.13412266969680786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,4095,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,4095,0.13195199767748514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,8191,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,8191,0.2567946712176005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,8191,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,8191,0.2547360062599182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,float16,16383,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,128,1,float16,fp8,16383,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,1,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,float16,16383,0.5226986805597941
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,1,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,1,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,1,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,64,0,1,float16,fp8,16383,0.5216480096181234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,3,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,3,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,3,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,3,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,7,0.013845333208640417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,7,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,7,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,7,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,15,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,15,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,15,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,15,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,31,0.017968000223239262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,31,0.016656000167131424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,31,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,31,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,63,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,63,0.01670933390657107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,63,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,63,0.015263999501864115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,127,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,127,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,127,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,127,0.015504000087579092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,255,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,255,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,255,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,255,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,511,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,1023,0.041850666205088295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,1023,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,511,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,2047,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,511,0.01605333387851715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,511,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,1023,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,1023,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,2047,0.07323200007279713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,2047,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,2047,0.07234666744867961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,4095,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,4095,0.13410133123397827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,4095,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,4095,0.13182933131853738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,8191,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,8191,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,8191,0.25681066513061523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,8191,0.25525333484013873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,float16,16383,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,128,1,float16,fp8,16383,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,float16,16383,0.5304640134175619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,64,0,1,float16,fp8,16383,0.5238399902979533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,3,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,3,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,7,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,15,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,63,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,63,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,63,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,127,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,127,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,127,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,1023,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,511,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,511,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,1023,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,2047,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,1023,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,1023,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,2047,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,4095,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,2047,0.015872000406185787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,2047,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,4095,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,4095,0.019760000209013622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,4095,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,8191,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,8191,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,8191,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,8191,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,float16,16383,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,float16,16383,0.04450666904449463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,128,1,float16,fp8,16383,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,64,0,1,float16,fp8,16383,0.03618133316437403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,1,1.00709867477417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,1,0.8004159927368164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,1,1.006112019220988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,1,0.7999359766642252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,3,1.0072533289591472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,3,1.0070079962412517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,3,0.8032000064849854
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,3,0.8016266822814941
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,7,1.0397120316823323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,7,1.0386079947153728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,7,0.8395520051320394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,7,0.8397813638051351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,15,1.0636533101399739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,15,0.8770399888356527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,15,1.0640587011973064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,15,0.876751979192098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,31,1.2875680128733318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,31,1.2861119906107585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,31,1.1268959840138753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,31,1.1268906593322754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,float16,63,1.2983307043711345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,float16,63,1.297594706217448
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,128,1,float16,fp8,63,1.1363680362701416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,64,0,1,float16,fp8,63,1.1360267003377278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,1,1.0088106791178386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,1,1.0105120340983074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,1,0.814634641011556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,1,0.8139893213907877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,3,1.011034647623698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,3,1.0106933116912842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,3,0.8139733473459879
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,3,0.813642660776774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,7,1.0425653457641602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,7,1.04148268699646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,7,0.8423893451690674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,7,0.8419413566589355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,15,1.0642399787902832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,15,1.0643786589304607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,15,0.8776426315307617
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,15,0.8775466283162435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,31,1.2882613341013591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,31,1.2883573373158772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,31,1.128218650817871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,31,1.128117322921753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,float16,63,1.3015413284301758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,float16,63,1.2995306650797527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,128,1,float16,fp8,63,1.1359679698944092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,1,0.10530666510264079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,64,0,1,float16,fp8,63,1.135866641998291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,1,0.10508267084757487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,1,0.09718933701515198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,1,0.09715732932090759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,3,0.10500267148017883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,3,0.10519466797510783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,3,0.09685867031415303
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,3,0.09704533219337463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,7,0.10528533657391866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,7,0.10467200477917989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,7,0.09754133224487305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,7,0.096778670946757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,15,0.10435733199119568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,15,0.10495466987291972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,15,0.09706667065620422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,15,0.09700266520182292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,31,0.10506666700045268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,31,0.10497066378593445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,31,0.09757333000500996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,31,0.09672000010808308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,float16,63,0.10493866602579753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,float16,63,0.10499733686447144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,128,1,float16,fp8,63,0.0969599982102712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,64,0,1,float16,fp8,63,0.09681600332260132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,1,1.5913920402526855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,1,2.005770683288574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,1,1.5946346918741863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,1,2.006122589111328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,3,1.5925599733988445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,3,2.006714661916097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,3,1.5931626955668132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,3,2.0081547101338706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,7,2.071674664815267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,7,1.6734026273091633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,7,2.069935957590739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,7,1.6731360753377278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,15,2.1220107078552246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,15,1.7471733093261719
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,15,2.121690591176351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,15,1.744874636332194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,float16,31,2.5702293713887534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,128,1,float16,fp8,31,2.2465813954671225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,float16,31,2.56276798248291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,64,0,1,float16,fp8,31,2.248506704966227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,1,2.012506643931071
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,1,1.622927983601888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,1,1.6243146260579426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,1,2.013962745666504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,3,2.0167360305786133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,3,1.6258026758829753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,3,1.6253867149353027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,3,2.0171839396158853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,7,2.0826133092244468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,7,1.6790879567464192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,7,2.080885410308838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,7,1.678928057352702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,15,1.7546826998392742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,15,2.1702399253845215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,15,2.16649595896403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,15,1.7525653839111328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,fp8,31,2.256373405456543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,128,1,float16,float16,31,2.64900271097819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,fp8,31,2.2543840408325195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,64,0,1,float16,float16,31,2.653989315032959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,1,0.1989120046297709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,1,0.19914132356643677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,1,0.18684266010920206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,1,0.18686934312184653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,3,0.19928000370661417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,3,0.19933332999547324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,3,0.18569600582122803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,3,0.1862293283144633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,7,0.19934399922688803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,7,0.199455996354421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,7,0.18674665689468384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,7,0.18621333440144858
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,15,0.19902400175730386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,15,0.1992266575495402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,15,0.1865760087966919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,15,0.18631466229756674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,1,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,float16,31,0.19953066110610962
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,1,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,1,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,1,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,3,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,3,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,3,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,3,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,float16,31,0.1994719902674357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,7,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,128,1,float16,fp8,31,0.18690133094787598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,64,0,1,float16,fp8,31,0.18515199422836304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,7,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,7,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,7,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,15,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,15,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,15,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,15,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,31,0.026789332429567974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,31,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,31,0.02475200096766154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,31,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,63,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,63,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,63,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,63,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,127,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,127,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,127,0.025226667523384094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,511,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,127,0.02422933280467987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,255,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,255,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,255,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,255,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,511,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,1023,0.06946133573849995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,511,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,511,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,1023,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,1023,0.07225066423416138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,1023,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,2047,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,2047,0.1255466639995575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,2047,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,8191,0.027466667195161183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,2047,0.12345066666603088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,float16,4095,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,4095,0.23609066009521484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,4095,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,4095,0.23233066002527872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,128,1,float16,fp8,8191,0.025072000920772552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,1,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,float16,8191,0.4553920030593872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,1,0.02186666677395503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,64,0,1,float16,fp8,8191,0.45077331860860187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,1,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,1,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,3,0.021754667162895203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,3,0.021594665944576263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,3,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,3,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,7,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,7,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,7,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,7,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,15,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,15,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,15,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,15,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,31,0.026165333886941273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,31,0.026917333404223125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,31,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,31,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,63,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,63,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,63,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,63,0.02426133304834366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,127,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,127,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,127,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,127,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,255,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,255,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,255,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,511,0.04187199970086416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,255,0.028336000939210255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,511,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,511,0.044250667095184326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,511,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,1023,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,1023,0.07232533395290375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,1023,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,1023,0.06881600121657054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,2047,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,2047,0.12562666336695352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,2047,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,2047,0.12386133273442586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,4095,0.23202667633692423
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,4095,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,4095,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,4095,0.23646400372187296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,float16,8191,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,128,1,float16,fp8,8191,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,float16,8191,0.459173321723938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,1,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,64,0,1,float16,fp8,8191,0.4507519801457723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,3,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,7,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,3,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,7,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,15,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,31,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,63,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,31,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,127,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,127,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,511,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,255,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,1023,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,511,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,511,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,1023,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,2047,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,2047,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,2047,0.014352000008026758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,2047,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,4095,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,4095,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,4095,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,4095,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,float16,8191,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,float16,8191,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,128,1,float16,fp8,8191,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,64,0,1,float16,fp8,8191,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,1,0.03549866626660029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,15,0.0455626646677653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,1,0.033386667569478355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,3,0.0374293327331543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,3,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,7,0.03764266769091288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,7,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,15,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,127,0.04990933338801066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,255,0.08044800162315369
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,31,0.045797333121299744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,31,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,63,0.045781334241231285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,63,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,1023,0.23428267240524292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,127,0.05397866666316986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,255,0.07430399954319
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,511,0.13032533725102743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,511,0.12442666292190552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,1023,0.22380266586939493
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,2047,0.44118932882944745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,2047,0.42131201426188153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,fp8,4095,0.8166986306508383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,1,0.03570133447647095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,7,0.03754666695992152
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,1,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,3,0.037445334096749626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,1,128,0,1,float16,float16,4095,0.8570079803466797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,3,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,7,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,15,0.04587199787298838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,15,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,127,0.053818667928377785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,127,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,31,0.045594667394955955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,255,0.07444799939791362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,31,0.041840001940727234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,63,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,63,0.04182399809360504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,255,0.0804319977760315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,1023,0.22383999824523926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,511,0.13051199913024902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,511,0.12370666861534119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,1023,0.2339466611544291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,1,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,1,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,2047,0.44734398523966473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,2047,0.42268800735473633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,3,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,float16,4095,0.8792959849039713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,3,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,4,128,0,1,float16,fp8,4095,0.826848030090332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,31,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,31,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,7,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,63,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,7,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,15,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,15,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,63,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,511,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,127,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,127,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,255,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,255,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,511,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,1023,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,1023,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,2047,0.039018665750821434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,2047,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,float16,4095,0.06002666552861532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,96,8,128,0,1,float16,fp8,4095,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,1,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,1,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,3,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,3,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,7,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,7,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,15,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,15,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,31,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,63,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,63,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,127,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,127,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,255,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,2047,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,255,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,511,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,511,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,1023,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,1023,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,2047,0.040405333042144775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,4095,0.0643039991458257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,4095,0.06029333174228668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,8191,0.11011200149854024
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,8191,0.09914132952690125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,1,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,float16,16383,0.2035306692123413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,1,128,0,1,float16,fp8,16383,0.17499200503031412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,3,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,3,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,7,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,7,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,15,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,15,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,31,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,31,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,63,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,63,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,127,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,127,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,255,0.01971199984351794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,255,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,511,0.023242667317390442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,511,0.025663999219735462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,1023,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,1023,0.03107733279466629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,2047,0.0417546679576238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,8191,0.09904533624649048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,2047,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,4095,0.0641599992911021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,4095,0.060346667965253196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,8191,0.11117866635322571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,float16,16383,0.20286399126052856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,4,128,0,1,float16,fp8,16383,0.1750133236249288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,3,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,7,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,15,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,63,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,1023,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,511,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,1023,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,2047,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,2047,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,4095,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,4095,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,8191,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,8191,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,float16,16383,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,1,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,96,8,128,0,1,float16,fp8,16383,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,1,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,3,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,3,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,7,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,7,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,15,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,15,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,31,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,31,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,63,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,63,0.01998399943113327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,127,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,127,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,255,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,2047,0.05343466500441233
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,255,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,511,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,511,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,1023,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,1023,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,2047,0.051818668842315674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,4095,0.09473599990208943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,4095,0.08890133102734883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,8191,0.17692800362904867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,8191,0.16375999649365744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,1,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,float16,16383,0.34137598673502606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,1,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,1,128,0,1,float16,fp8,16383,0.31189332405726117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,3,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,3,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,7,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,7,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,15,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,15,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,31,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,31,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,63,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,63,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,127,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,127,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,255,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,2047,0.05345066885153452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,255,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,511,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,511,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,1023,0.032485333581765495
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,1023,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,2047,0.05203733344872793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,4095,0.09488532940546672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,4095,0.08890133102734883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,8191,0.1766080061594645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,8191,0.16307199994723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,float16,16383,0.34185067812601727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,4,128,0,1,float16,fp8,16383,0.31218665838241577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,3,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,127,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,255,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,255,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,511,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,4095,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,1023,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,8191,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,2047,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,2047,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,4095,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,8191,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,float16,16383,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,96,8,128,0,1,float16,fp8,16383,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,1,0.06684266527493794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,1,0.05753066639105479
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,3,0.06852266689141591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,3,0.059802666306495667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,7,0.07046400010585785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,7,0.06196266909440359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,15,0.08656000097592671
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,15,0.07627733548482259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,31,0.08596266309420268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,31,0.07658666869004567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,63,0.0867039958635966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,63,0.07639466722806294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,127,0.10116799672444661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,127,0.09306666254997253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,255,0.15239999691645303
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,255,0.14179733395576477
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,511,0.2527199983596802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,511,0.24021865924199423
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,1023,0.4596159855524699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,3,0.0688266654809316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,1,0.06644799808661143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,1023,0.4352480173110962
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,1,0.05808533231417338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,3,0.05983999868233999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,fp8,2047,0.8263413111368815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,1,128,0,1,float16,float16,2047,0.8696106274922689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,7,0.0703359991312027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,63,0.0867039958635966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,7,0.06189866860707601
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,15,0.08559466401735942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,15,0.07665066421031952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,31,0.08648533622423808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,31,0.07658666869004567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,63,0.07662400106589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,127,0.10120532910029094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,127,0.09310932954152425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,255,0.15237866838773093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,255,0.14190399646759033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,511,0.2535093426704407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,511,0.24014933904012045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,1023,0.46768001715342206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,1023,0.4349386692047119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,1,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,1,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,3,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,float16,2047,0.889695962270101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,3,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,4,128,0,1,float16,fp8,2047,0.8410346508026123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,7,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,7,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,15,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,15,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,31,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,31,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,63,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,63,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,511,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,127,0.016789333273967106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,127,0.015365333606799444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,255,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,255,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,511,0.02205866575241089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,1023,0.03651199986537298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,1023,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,float16,2047,0.060933331648508705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,96,8,128,0,1,float16,fp8,2047,0.04418133199214935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,1,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,15,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,1,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,3,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,3,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,7,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,7,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,15,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,31,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,31,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,63,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,63,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,127,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,127,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,255,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,255,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,511,0.03142400085926056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,2047,0.07027733325958252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,511,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,1023,0.04598399996757507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,1023,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,8191,0.2630239923795064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,8191,0.24488532543182373
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,2047,0.07836266855398814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,4095,0.1400213340918223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,4095,0.12957866986592612
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,1,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,1,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,float16,16383,0.509119987487793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,3,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,1,128,0,1,float16,fp8,16383,0.47827200094858807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,3,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,7,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,7,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,15,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,15,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,31,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,31,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,63,0.02083733429511388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,63,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,127,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,127,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,255,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,255,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,511,0.0312266672650973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,511,0.02699200063943863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,1023,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,1023,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,2047,0.07795199751853943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,2047,0.07030400137106578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,4095,0.14008532961209616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,4095,0.12999467055002847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,8191,0.26307199398676556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,8191,0.24624532461166382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,1,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,float16,16383,0.5194666783014933
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,4,128,0,1,float16,fp8,16383,0.48372264703114826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,7,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,15,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,31,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,127,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,511,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,511,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,1023,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,1023,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,2047,0.016250666230916977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,2047,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,4095,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,4095,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,8191,0.02163200080394745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,8191,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,float16,16383,0.039349332451820374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,96,8,128,0,1,float16,fp8,16383,0.026378666361172993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,7,0.13587199648221335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,1,0.1295253336429596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,1,0.10734933614730835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,15,0.14615466197331747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,3,0.13362666964530945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,3,0.11157866319020589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,7,0.11724799871444702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,15,0.16639467080434164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,31,0.16644799709320068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,31,0.14627200365066528
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,63,0.16665599743525186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,63,0.14622933665911356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,127,0.19744000832239786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,127,0.1790613333384196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,255,0.29783467451731366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,255,0.2760319908459981
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,511,0.4989173412322998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,511,0.4701546827952067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,3,0.13372266292572021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,1,0.12763200203577676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,1,0.10899200042088826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,float16,1023,0.9082933266957601
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,3,0.11148266990979512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,1,128,0,1,float16,fp8,1023,0.857098658879598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,7,0.1358506679534912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,7,0.11753599842389424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,15,0.16641066471735635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,15,0.14596266547838846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,31,0.1665173371632894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,31,0.1463093360265096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,63,0.16684800386428833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,63,0.14636266231536865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,127,0.19708265860875449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,127,0.17969600359598795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,255,0.2977653344472249
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,255,0.27537065744400024
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,511,0.5104586680730184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,511,0.47017598152160645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,1,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,1,0.0229066660006841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,7,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,3,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,7,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,float16,1023,0.9276320139567057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,3,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,31,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,4,128,0,1,float16,fp8,1023,0.8729759852091471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,15,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,15,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,31,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,63,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,63,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,127,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,127,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,255,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,255,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,511,0.03745600084463755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,511,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,float16,1023,0.058090666929880776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,96,8,128,0,1,float16,fp8,1023,0.04333333174387614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,1,0.25082133213679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,1,0.2091040015220642
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,3,0.26099199056625366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,15,0.3262293338775635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,3,0.21624533335367838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,7,0.2650826573371887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,7,0.22819199164708456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,15,0.2851840058962504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,31,0.32631999254226685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,127,0.3874826828638713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,31,0.28601600726445514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,63,0.3282666603724162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,63,0.2873013416926066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,127,0.35254931449890137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,1,0.25037866830825806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,float16,255,0.588709314664205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,1,128,0,1,float16,fp8,255,0.543557325998942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,1,0.20973867177963257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,3,0.2608480056126912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,3,0.21785066525141397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,7,0.26714134216308594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,7,0.22824533780415854
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,15,0.3265226682027181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,15,0.2860479950904846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,31,0.32650667428970337
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,31,0.2855306665102641
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,63,0.3285013238588969
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,63,0.2876960039138794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,127,0.3875466585159302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,1,0.03526400029659271
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,127,0.3525013526280721
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,1,0.03761066744724909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,float16,255,0.6110933224360148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,4,128,0,1,float16,fp8,255,0.5451680024464926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,3,0.03751466671625773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,15,0.03524799893299738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,3,0.035301332672437034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,7,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,7,0.03526933242877325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,15,0.03737066686153412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,31,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,31,0.03563733398914337
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,63,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,255,0.03555200000603994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,63,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,127,0.03757333258787791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,fp8,127,0.035360001027584076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,96,8,128,0,1,float16,float16,255,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,1,0.49434133370717365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,1,0.4105600118637085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,3,0.5176586707433065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,3,0.42663999398549396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,7,0.5267680088678995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,7,0.4510720173517863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,15,0.6459360122680664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,15,0.5641813278198242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,31,0.646021326382955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,31,0.5663573344548544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,63,0.6518719991048177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,63,0.5687466859817505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,float16,127,0.7686506907145182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,1,0.4127466678619385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,1,0.4941279888153076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,1,128,0,1,float16,fp8,127,0.6986560026804606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,3,0.5171840190887451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,7,0.45104531447092694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,3,0.4289226531982422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,7,0.5292906761169434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,15,0.5643253326416016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,15,0.646016001701355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,31,0.6475893259048462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,31,0.5663146575291952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,63,0.651807983716329
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,63,0.5696426630020142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,1,0.06620799998442332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,1,0.06121066709359487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,3,0.06701866785685222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,fp8,127,0.7003786563873291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,4,128,0,1,float16,float16,127,0.8060373465220133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,3,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,7,0.06698666512966156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,7,0.06152533491452535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,15,0.06649066507816315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,15,0.06191466748714447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,31,0.06606400012969971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,31,0.06161599854628245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,1,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,63,0.06604800124963124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,63,0.06192533175150553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,float16,127,0.06788800160090129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,1,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,96,8,128,0,1,float16,fp8,127,0.06195733447869619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,3,0.01461333284775416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,3,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,7,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,7,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,63,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,15,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,15,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,31,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,31,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,63,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,127,0.020175999651352566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,127,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,255,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,255,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,511,0.0422986646493276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,511,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,1023,0.07236266632874806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,1023,0.0711359977722168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,2047,0.13371200362841287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,2047,0.12957866986592612
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,4095,0.25679999589920044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,4095,0.2485439976056417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,8191,0.5029866695404053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,8191,0.48862401644388836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,7,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,1,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,1,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,3,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,15,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,3,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,float16,16383,1.0310293038686116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,7,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,1,128,0,1,float16,fp8,16383,1.1003039677937825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,15,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,31,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,31,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,63,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,511,0.04167466859022776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,63,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,1023,0.07261866827805837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,127,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,2047,0.1336373289426168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,127,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,255,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,255,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,511,0.04171733558177948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,1023,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,2047,0.13025066256523132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,4095,0.2569813330968221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,4095,0.24992533524831137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,8191,0.5059573252995809
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,8191,0.49034667015075684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,float16,16383,1.1536959807078044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,15,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,127,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,4,128,0,1,float16,fp8,16383,1.146666685740153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,63,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,63,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,2047,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,511,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,4095,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,1023,0.013573333621025085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,2047,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,4095,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,8191,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,8191,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,float16,16383,0.059952000776926674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,96,8,128,0,1,float16,fp8,16383,0.042447999119758606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,1,0.9840053717295328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,1,0.8131999969482422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,3,1.0311520099639893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,3,0.8482293287913004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,7,1.048799991607666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,7,0.8939519723256429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,15,1.2852319876352947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,15,1.1228960355122883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,31,1.2875786622365315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,31,1.1264586448669434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,float16,63,1.3006933530171711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,1,0.9812266826629639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,1,128,0,1,float16,fp8,63,1.132208029429118
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,1,0.8195306460062662
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,3,1.032480001449585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,3,0.8505067030588785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,7,1.0542133649190266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,7,0.8953920205434164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,15,1.1234933535257976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,15,1.2961973349253337
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,31,1.3178719679514568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,1,0.12173333764076233
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,1,0.1129813293615977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,31,1.136240005493164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,3,0.12138666709264119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,float16,63,1.3436427116394043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,4,128,0,1,float16,fp8,63,1.1746293703715007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,3,0.11356799801190694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,7,0.12177600463231404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,7,0.11338133613268535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,15,0.12148800492286682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,15,0.1132533351580302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,31,0.12128000458081563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,float16,63,0.12268267075220744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,31,0.11328533291816711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,96,8,128,0,1,float16,fp8,63,0.11331733067830403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,1,1.7320693333943684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,1,2.0958080291748047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,3,1.7706400553385417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,3,2.1655786832173667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,7,1.8542826970418294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,7,2.197450637817383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,15,2.2968640327453613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,15,2.6276747385660806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,1,1.7964159647623699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,fp8,31,2.307546615600586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,1,128,0,1,float16,float16,31,2.630880037943522
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,1,2.1749280293782554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,3,1.8241492907206218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,3,2.222485383351644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,7,1.900261402130127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,7,2.258421262105306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,1,0.2302346626917521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,1,0.2137226661046346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,3,0.23009065786997476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,3,0.21353065967559814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,15,2.320277372996012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,15,2.6558400789896646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,fp8,31,2.3343733151753745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,7,0.22988800207773843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,7,0.21359467506408691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,4,128,0,1,float16,float16,31,2.6609867413838706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,1,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,15,0.22961600621541342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,1,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,3,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,15,0.21342400709788004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,7,0.02275199939807256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,fp8,31,0.21378666162490845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,3,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,7,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,15,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,15,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,127,0.030016000072161358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,96,8,128,0,1,float16,float16,31,0.22988800207773843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,31,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,31,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,63,0.025834667185942333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,63,0.023743999501069386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,127,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,255,0.04368533194065094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,255,0.04165866722663244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,511,0.07222933570543925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,2047,0.2279626727104187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,511,0.06854400038719177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,1023,0.12570666273434958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,1023,0.12164266904195149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,2047,0.23640533288319907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,4095,0.4562079906463623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,4095,0.4389653205871582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,1,0.02142400046189626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,1,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,3,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,15,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,float16,8191,0.8973226547241211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,3,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,1,128,0,1,float16,fp8,8191,0.8659573396046957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,7,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,7,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,15,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,31,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,31,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,63,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,63,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,127,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,127,0.027797333896160126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,255,0.04391466577847799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,255,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,511,0.07221333185831706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,511,0.06826133529345195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,1023,0.12578133742014566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,1023,0.12176000078519185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,2047,0.23613866170247397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,2047,0.22787733872731528
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,4095,0.4607733488082886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,4095,0.4405226707458496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,1,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,1,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,float16,8191,0.9260640144348145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,4,128,0,1,float16,fp8,8191,0.8718773523966471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,7,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,15,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,127,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,31,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,31,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,1023,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,255,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,255,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,511,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,511,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,1023,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,2047,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,2047,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,4095,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,4095,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,float16,8191,0.05945600072542826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,3,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,96,8,128,0,1,float16,fp8,8191,0.04278933505217234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,1,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,3,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,1,0.027386667827765148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,1,0.023525332411130268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,1,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,3,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,3,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,7,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,7,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,7,0.02349333216746648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,7,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,15,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,15,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,15,0.025066666305065155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,15,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,31,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,31,0.03156800071398417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,63,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,31,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,31,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,63,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,127,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,63,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,255,0.03788800040880839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,63,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,127,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,127,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,127,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,255,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,255,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,255,0.035317334036032356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,511,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,511,0.0558186670144399
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,511,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,511,0.051957334081331887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,1023,0.03339199970165888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,1023,0.08886933326721191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,1023,0.029552000264326733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,1023,0.08678399523099263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,2047,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,2047,0.1586666703224182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,2047,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,2047,0.15451733271280924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,4095,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,4095,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,fp8,8191,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,4095,0.297760009765625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,1,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,1,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,4095,0.2919146617253621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,fp8,8191,0.5683146715164185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,128,1,float16,float16,8191,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,3,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,1,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,64,0,1,float16,float16,8191,0.5757493178049723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,1,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,3,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,3,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,7,0.02332799881696701
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,3,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,7,0.02775466690460841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,7,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,7,0.024005333582560223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,15,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,15,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,31,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,15,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,15,0.02348800003528595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,31,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,31,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,31,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,63,0.0334346666932106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,63,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,63,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,127,0.029706666866938274
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,63,0.02958933264017105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,127,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,127,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,255,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,511,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,127,0.02957333376010259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,255,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,255,0.039146666725476585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,1023,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,255,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,511,0.05584533512592316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,511,0.02945599953333537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,511,0.051701332132021584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,1023,0.09076266487439473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,1023,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,1023,0.086709330479304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,2047,0.03365333378314972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,2047,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,2047,0.15851199626922607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,2047,0.15529599785804749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,4095,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,4095,0.29758399724960327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,4095,0.02939733366171519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,float16,8191,0.033770665526390076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,4095,0.2918986678123474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,128,1,float16,fp8,8191,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,float16,8191,0.5820480187733968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,64,0,1,float16,fp8,8191,0.5687679847081503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,3,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,3,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,7,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,7,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,7,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,31,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,63,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,127,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,255,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,511,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,1023,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,1023,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,2047,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,1023,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,1023,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,2047,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,2047,0.01939733326435089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,2047,0.013568000247081121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,4095,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,4095,0.026149332523345947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,4095,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,4095,0.025040000677108765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,float16,8191,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,float16,8191,0.04168533285458883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,128,1,float16,fp8,8191,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,64,0,1,float16,fp8,8191,0.03565866748491923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,1,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,1,0.012191999703645706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,1,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,1,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,3,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,3,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,3,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,3,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,7,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,7,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,7,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,15,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,15,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,15,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,31,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,31,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,31,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,31,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,63,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,63,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,63,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,127,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,127,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,127,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,127,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,511,0.013663999736309052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,255,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,255,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,255,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,255,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,511,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,511,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,2047,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,1023,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,1023,0.012037333101034164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,1023,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,2047,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,2047,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,2047,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,4095,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,4095,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,4095,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,4095,0.03199466566244761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,float16,8191,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,float16,8191,0.061994666854540505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,128,1,float16,fp8,8191,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,3,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,1,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,64,0,1,float16,fp8,8191,0.05435733497142792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,1,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,1,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,7,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,7,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,3,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,3,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,7,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,7,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,15,0.008656000097592672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,15,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,15,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,15,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,31,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,31,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,31,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,31,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,63,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,63,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,127,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,127,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,127,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,255,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,255,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,255,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,255,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,511,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,1023,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,511,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,511,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,2047,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,1023,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,1023,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,1023,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,2047,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,2047,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,2047,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,8191,0.04836800197760264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,8191,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,4095,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,4095,0.03336533407370249
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,4095,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,4095,0.035818666219711304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,8191,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,8191,0.04804799954096476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,16383,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,16383,0.07850666840871175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,16383,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,float16,32767,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,16383,0.07454399764537811
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,float16,32767,0.1405119995276133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,128,1,float16,fp8,32767,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,1,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,1,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,64,0,1,float16,fp8,32767,0.12550933162371317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,1,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,3,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,3,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,3,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,7,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,7,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,7,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,7,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,15,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,15,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,15,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,15,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,31,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,31,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,31,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,63,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,63,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,63,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,127,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,127,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,127,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,127,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,255,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,255,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,255,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,255,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,511,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,511,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,511,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,511,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,2047,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,1023,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,2047,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,1023,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,1023,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,1023,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,2047,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,8191,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,2047,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,4095,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,4095,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,4095,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,4095,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,8191,0.048656001687049866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,8191,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,8191,0.048026666045188904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,16383,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,16383,0.07860266665617625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,16383,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,16383,0.07479466497898102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,float16,32767,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,float16,32767,0.14056000113487244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,1,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,128,1,float16,fp8,32767,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,1,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,64,0,1,float16,fp8,32767,0.1258240044116974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,3,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,7,0.009888000165422758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,7,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,15,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,15,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,31,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,31,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,127,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,127,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,511,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,511,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,511,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,1023,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,1023,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,2047,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,1023,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,2047,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,2047,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,2047,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,8191,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,4095,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,4095,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,4095,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,4095,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,8191,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,8191,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,8191,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,16383,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,16383,0.01848000039656957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,16383,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,16383,0.01752000053723653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,float16,32767,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,float16,32767,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,128,1,float16,fp8,32767,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,1,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,1,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,64,0,1,float16,fp8,32767,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,3,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,3,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,3,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,7,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,15,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,15,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,15,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,31,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,63,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,127,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,127,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,127,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,255,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,255,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,511,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,1023,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,2047,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,511,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,2047,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,1023,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,1023,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,2047,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,2047,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,8191,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,4095,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,4095,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,4095,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,4095,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,8191,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,8191,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,8191,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,16383,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,16383,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,16383,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,16383,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,float16,32767,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,float16,32767,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,128,1,float16,fp8,32767,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,64,0,1,float16,fp8,32767,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,1,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,3,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,1,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,3,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,3,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,7,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,7,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,7,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,7,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,31,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,15,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,15,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,15,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,15,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,31,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,31,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,31,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,63,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,63,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,127,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,127,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,127,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,127,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,511,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,255,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,255,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,255,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,255,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,1023,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,1023,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,511,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,511,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,2047,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,2047,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,511,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,1023,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,1023,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,2047,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,2047,0.030581332743167877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,8191,0.06435200075308482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,4095,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,8191,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,16383,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,4095,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,4095,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,4095,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,8191,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,8191,0.060271998246510826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,16383,0.1112000048160553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,16383,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,16383,0.09894399841626485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,float16,32767,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,1,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,128,1,float16,fp8,32767,0.02350933353106181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,float16,32767,0.2034346659978231
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,1,0.008682666967312494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,64,0,1,float16,fp8,32767,0.17640000581741333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,1,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,1,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,3,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,3,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,3,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,3,0.010442666709423065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,7,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,7,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,7,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,7,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,15,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,15,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,15,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,15,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,31,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,31,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,31,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,31,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,63,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,63,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,63,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,127,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,127,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,127,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,127,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,255,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,255,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,255,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,255,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,511,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,511,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,511,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,511,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,2047,0.029616000751654308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,2047,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,1023,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,1023,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,1023,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,4095,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,1023,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,2047,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,2047,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,8191,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,4095,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,4095,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,4095,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,8191,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,16383,0.11116799712181091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,8191,0.06412266691525777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,8191,0.060218666990598045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,16383,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,16383,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,float16,32767,0.20336532592773438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,16383,0.09889599680900574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,float16,32767,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,128,1,float16,fp8,32767,0.02332266668478648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,1,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,64,0,1,float16,fp8,32767,0.17497599124908447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,3,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,3,0.009786666681369146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,3,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,7,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,7,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,15,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,31,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,31,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,63,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,127,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,127,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,255,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,1023,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,511,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,1023,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,1023,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,2047,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,2047,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,2047,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,2047,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,4095,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,4095,0.013797332843144735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,4095,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,4095,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,8191,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,8191,0.016469333320856094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,8191,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,8191,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,16383,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,32767,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,float16,16383,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,16383,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,16383,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,float16,32767,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,1,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,128,1,float16,fp8,32767,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,64,0,1,float16,fp8,32767,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,1,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,3,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,3,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,7,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,7,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,7,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,7,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,15,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,15,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,15,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,31,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,31,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,31,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,63,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,63,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,127,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,127,0.010266666611035665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,127,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,255,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,255,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,511,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,1023,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,1023,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,1023,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,2047,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,2047,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,2047,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,4095,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,2047,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,4095,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,4095,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,4095,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,8191,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,8191,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,8191,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,8191,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,16383,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,16383,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,16383,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,16383,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,float16,32767,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,1,0.048154667019844055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,128,1,float16,fp8,32767,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,float16,32767,0.02442666639884313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,1,0.050698667764663696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,64,0,1,float16,fp8,32767,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,1,0.039850667119026184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,1,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,3,0.04967466493447622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,3,0.05086933573087057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,7,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,3,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,3,0.03984000037113825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,7,0.049786667029062905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,7,0.050026665131251015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,7,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,15,0.051669334371884666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,15,0.05186666548252106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,15,0.04339733223120371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,15,0.04193066557248434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,31,0.059994667768478394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,31,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,63,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,31,0.05376533170541128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,31,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,63,0.06035199761390686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,63,0.060085331400235496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,63,0.05390933156013489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,255,0.07125866909821828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,127,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,127,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,511,0.06157866617043813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,127,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,511,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,127,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,255,0.060229331254959106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,255,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,255,0.06423999865849812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,511,0.10495466987291972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,511,0.09718933701515198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,1023,0.06032533446947733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,1023,0.17254932721455893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,1023,0.053861334919929504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,1023,0.16470932960510254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,2047,0.06213866670926412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,2047,0.05375466744105021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,2047,0.3087199926376343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,2047,0.30106133222579956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,1,0.04783466458320618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,float16,4095,0.060415998101234436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,128,1,float16,fp8,4095,0.05395199855168661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,float16,4095,0.5824426809946696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,1,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,1,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,3,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,64,0,1,float16,fp8,4095,0.5718719959259033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,1,0.039749334255854286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,3,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,3,0.04901333153247833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,3,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,15,0.051034669081370033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,7,0.05008000135421753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,7,0.0499946673711141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,31,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,7,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,7,0.041690667470296226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,15,0.05171733101209005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,15,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,15,0.044581333796183266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,31,0.06020799775918325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,31,0.05373333394527435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,31,0.05286933481693268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,63,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,63,0.06000000238418579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,63,0.05395199855168661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,63,0.05387733379999796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,127,0.06067200005054474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,127,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,255,0.05374933282534281
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,127,0.053823997577031456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,127,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,255,0.06027733286221822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,255,0.07133333384990692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,255,0.06407466530799866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,511,0.06067200005054474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,511,0.10492799679438274
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,1023,0.05412266651789347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,511,0.053717335065205894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,1023,0.1646346648534139
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,511,0.09715200463930766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,1023,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,1023,0.17268266280492148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,2047,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,2047,0.053743998209635414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,2047,0.3082080086072286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,2047,0.30023467540740967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,float16,4095,0.06016000111897787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,128,1,float16,fp8,4095,0.054085334142049156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,float16,4095,0.5888426701227824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,1,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,1,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,1,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,64,0,1,float16,fp8,4095,0.5722773472468058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,1,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,3,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,3,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,3,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,3,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,7,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,7,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,7,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,7,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,31,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,15,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,15,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,15,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,63,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,15,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,31,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,31,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,31,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,63,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,63,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,255,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,63,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,127,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,127,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,127,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,511,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,127,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,255,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,255,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,255,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,1023,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,511,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,511,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,511,0.014005333185195923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,1023,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,1023,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,1023,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,2047,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,2047,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,2047,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,1,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,2047,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,float16,4095,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,1,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,float16,4095,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,128,1,float16,fp8,4095,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,64,0,1,float16,fp8,4095,0.03670933345953623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,1,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,7,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,1,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,3,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,3,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,3,0.013733333597580591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,3,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,7,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,7,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,7,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,15,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,15,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,15,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,15,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,31,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,31,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,31,0.014111999422311783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,31,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,63,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,63,0.01358933374285698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,63,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,63,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,127,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,127,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,127,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,127,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,255,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,511,0.01759999990463257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,255,0.013925333817799887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,255,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,255,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,511,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,511,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,511,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,1023,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,1023,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,1023,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,1023,0.02269333352645238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,2047,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,2047,0.041663999358812966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,2047,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,2047,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,float16,4095,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,float16,4095,0.06381866832574208
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,128,1,float16,fp8,4095,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,64,0,1,float16,fp8,4095,0.055914665261904396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,1,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,1,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,1,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,3,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,3,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,3,0.008778666456540426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,3,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,7,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,7,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,7,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,15,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,15,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,15,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,15,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,31,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,31,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,31,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,127,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,63,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,63,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,63,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,63,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,127,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,127,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,127,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,255,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,255,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,255,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,255,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,511,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,511,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,511,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,511,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,1023,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,1023,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,1023,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,1023,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,2047,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,4095,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,2047,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,2047,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,2047,0.029152000943819683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,4095,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,4095,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,4095,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,8191,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,8191,0.09515733520189922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,8191,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,8191,0.08861866593360901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,16383,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,16383,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,16383,0.17673067251841226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,16383,0.1682986617088318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,float16,32767,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,1,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,float16,32767,0.340554674466451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,1,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,128,1,float16,fp8,32767,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,1,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,64,0,1,float16,fp8,32767,0.3399306535720825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,1,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,3,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,3,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,3,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,3,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,7,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,7,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,7,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,7,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,15,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,15,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,15,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,31,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,15,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,31,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,63,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,63,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,127,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,31,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,63,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,63,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,255,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,127,0.015354666858911514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,127,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,127,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,255,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,511,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,255,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,1023,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,255,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,511,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,1023,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,511,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,511,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,1023,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,2047,0.029802667597929638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,1023,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,2047,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,2047,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,4095,0.04854933420817057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,2047,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,4095,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,4095,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,4095,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,8191,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,8191,0.0950933297475179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,8191,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,16383,0.17672000328699747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,16383,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,8191,0.08886399865150452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,16383,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,16383,0.166810671488444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,float16,32767,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,1,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,128,1,float16,fp8,32767,0.012416000167528788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,1,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,float16,32767,0.3413439989089966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,1,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,3,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,64,0,1,float16,fp8,32767,0.33881068229675293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,3,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,15,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,31,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,63,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,63,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,127,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,127,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,127,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,255,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,1023,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,511,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,511,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,1023,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,1023,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,4095,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,2047,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,4095,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,2047,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,2047,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,2047,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,4095,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,16383,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,4095,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,8191,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,8191,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,8191,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,8191,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,16383,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,16383,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,16383,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,float16,32767,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,float16,32767,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,128,1,float16,fp8,32767,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,1,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,3,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,1,0.010399999717871347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,1,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,64,0,1,float16,fp8,32767,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,3,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,3,0.010330666477481524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,7,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,7,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,7,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,15,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,31,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,31,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,31,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,63,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,63,0.011930666863918304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,63,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,127,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,127,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,127,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,255,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,255,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,511,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,2047,0.01413333291808764
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,1023,0.009408000235756239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,1023,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,1023,0.014357333381970724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,4095,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,1023,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,2047,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,2047,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,2047,0.013754667093356451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,4095,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,4095,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,4095,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,8191,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,8191,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,8191,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,8191,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,16383,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,16383,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,float16,32767,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,16383,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,16383,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,float16,32767,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,128,1,float16,fp8,32767,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,1,0.09091200431187947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,1,0.09062932928403218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,64,0,1,float16,fp8,32767,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,1,0.07253333429495494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,1,0.07426666716734569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,3,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,3,0.09066667159398396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,3,0.0738453318675359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,3,0.07420266668001811
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,7,0.09301867087682088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,7,0.09277866284052531
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,7,0.07653866708278656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,7,0.07660266757011414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,15,0.09513066212336223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,15,0.09505066275596619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,15,0.07844799757003784
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,15,0.07875733574231465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,31,0.11335999766985576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,31,0.11327466368675232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,31,0.09920533498128255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,31,0.0999679962793986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,63,0.11390933394432068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,127,0.11539199948310852
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,127,0.1011893351872762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,63,0.11355732878049214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,63,0.10132267077763875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,63,0.1011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,127,0.11533332864443462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,127,0.10100266337394714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,255,0.11529599626859029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,255,0.13578133781750998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,255,0.1009279986222585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,255,0.12141333023707072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,511,0.11544000109036763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,511,0.20149334271748862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,511,0.10110933581988017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,511,0.18876800934473673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,1023,0.11532266934712727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,1023,0.10133333007494609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,1023,0.3349013328552246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,1023,0.32205865780512494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,float16,2047,0.11542399724324544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,128,1,float16,fp8,2047,0.10129066308339436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,1,0.0906986693541209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,float16,2047,0.6080960035324097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,1,0.09262933333714803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,1,0.07391466697057088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,64,0,1,float16,fp8,2047,0.5916746854782104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,1,0.07424533367156982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,3,0.09090133508046468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,3,0.09108266234397888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,3,0.07422400017579396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,3,0.07501866420110066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,7,0.09300800164540608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,7,0.09296533465385437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,7,0.0765173335870107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,7,0.07662400106589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,15,0.09490666786829631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,15,0.09505599737167358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,15,0.07946133116881053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,15,0.07941333452860515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,31,0.11318399508794148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,31,0.11328533291816711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,31,0.09916800260543823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,31,0.10061333576838176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,63,0.11355200409889221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,63,0.11520533760388692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,127,0.10129066308339436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,63,0.10086400310198466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,255,0.11523733536402385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,63,0.10081600149472554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,127,0.11530133088429768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,127,0.1151146690050761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,127,0.1009279986222585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,255,0.13573867082595825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,255,0.10097066561381023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,255,0.121370663245519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,511,0.11526933312416077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,511,0.20151466131210327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,511,0.1011893351872762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,511,0.18759467204411825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,1023,0.11587199568748474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,1023,0.10096533099810283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,fp8,2047,0.10127466917037964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,1023,0.33477334181467694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,1023,0.3224800030390422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,128,1,float16,float16,2047,0.11546666423479716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,1,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,float16,2047,0.6172106663386027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,1,0.016149333367745083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,1,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,64,0,1,float16,fp8,2047,0.5905119975407919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,1,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,3,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,3,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,3,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,3,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,7,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,15,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,7,0.016293333222468693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,7,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,7,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,15,0.015461333096027374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,15,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,15,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,31,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,31,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,31,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,31,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,63,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,63,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,63,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,63,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,127,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,127,0.01591466615597407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,127,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,127,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,255,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,255,0.01632000009218852
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,255,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,255,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,511,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,511,0.02062400057911873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,511,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,511,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,1023,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,1023,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,2047,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,fp8,1023,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,1023,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,128,1,float16,float16,2047,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,float16,2047,0.04543999830881754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,64,0,1,float16,fp8,2047,0.03762666632731756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,1,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,1,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,1,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,1,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,3,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,3,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,3,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,3,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,7,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,7,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,7,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,7,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,15,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,15,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,15,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,15,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,31,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,31,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,31,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,63,0.01754666616519292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,31,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,63,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,63,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,63,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,127,0.018768000106016796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,127,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,127,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,127,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,255,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,255,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,255,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,255,0.017808000246683758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,511,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,511,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,511,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,511,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,1023,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,float16,2047,0.021695998807748158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,1023,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,1023,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,1023,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,128,1,float16,fp8,2047,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,float16,2047,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,64,0,1,float16,fp8,2047,0.053685332338015236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,3,0.17414933443069458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,1,0.17470399538675943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,1,0.17462400595347086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,1,0.14018133282661438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,1,0.14010133345921835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,3,0.17400532960891724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,7,0.1460586686929067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,3,0.14005866646766663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,3,0.13990400234858194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,7,0.17963733275731406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,15,0.18313066164652506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,7,0.17908267180124918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,7,0.14588266611099243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,15,0.18286933501561484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,15,0.15213333566983542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,15,0.15204800168673197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,31,0.21975467602411905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,31,0.22184000412623087
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,63,0.2217973272005717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,31,0.19316800435384116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,31,0.19311465819676718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,63,0.2218453288078308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,63,0.19477866093317667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,63,0.19499733050664267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,127,0.1952106753985087
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,127,0.22345600525538126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,127,0.22378667195638022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,127,0.19514133532842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,255,0.22394667069117227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,255,0.26315200328826904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,255,0.1954560081164042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,255,0.23435733715693155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,float16,511,0.22406933705012003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,float16,511,0.39603734016418457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,128,1,float16,fp8,511,0.1964799960454305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,1,0.17466133832931519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,64,0,1,float16,fp8,511,0.36738133430480957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,1,0.1738026738166809
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,1,0.1402293344338735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,1,0.14044266939163208
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,3,0.17462400595347086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,3,0.17466133832931519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,3,0.14155733585357666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,7,0.14619200428326926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,3,0.1402293344338735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,7,0.17903999487559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,7,0.17905600865681967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,7,0.14591999848683676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,15,0.18315200010935465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,15,0.18290134270985922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,15,0.15385599931081137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,15,0.1521813372770945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,31,0.21976532538731894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,31,0.21979733308156332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,31,0.1931040088335673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,31,0.19315733512242636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,63,0.22173333168029785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,63,0.22165334224700928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,63,0.19670933485031128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,63,0.19510400295257568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,127,0.22385066747665405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,127,0.22386133670806885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,127,0.19535466035207114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,127,0.1954080065091451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,255,0.22381333510080972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,255,0.26473599672317505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,255,0.19514666001001993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,255,0.23413866758346558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,fp8,511,0.19548799594243368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,128,1,float16,float16,511,0.22388267517089844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,float16,511,0.3956746657689412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,1,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,64,0,1,float16,fp8,511,0.36773331960042316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,1,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,3,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,1,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,1,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,3,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,3,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,3,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,7,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,7,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,15,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,7,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,15,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,7,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,15,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,15,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,31,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,31,0.02146666745344798
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,31,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,31,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,63,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,63,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,63,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,63,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,127,0.021498667697111767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,127,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,127,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,127,0.021136000752449036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,255,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,255,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,255,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,255,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,float16,511,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,float16,511,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,128,1,float16,fp8,511,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,64,0,1,float16,fp8,511,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,1,0.029077333708604176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,1,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,1,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,1,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,7,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,3,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,3,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,3,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,3,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,7,0.029215998947620392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,7,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,7,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,15,0.02920000006755193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,15,0.02789866675933202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,15,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,15,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,31,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,31,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,31,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,31,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,63,0.029109333952267964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,63,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,63,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,127,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,63,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,127,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,127,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,127,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,255,0.029178666571776073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,255,0.027669332921504974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,255,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,255,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,float16,511,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,float16,511,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,128,1,float16,fp8,511,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,64,0,1,float16,fp8,511,0.03710933278004328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,1,0.34094401200612384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,1,0.2731200059254964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,1,0.3425706624984741
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,1,0.2733760078748067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,3,0.34252798557281494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,3,0.3423200050989787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,3,0.27345067262649536
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,3,0.2729919950167338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,7,0.35260268052419025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,7,0.3526826699574788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,7,0.2858560085296631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,7,0.285258670647939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,15,0.3590986728668213
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,15,0.29601067304611206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,15,0.3593120177586873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,31,0.43303998311360675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,31,0.43304534753163654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,15,0.2975626587867737
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,31,0.3808533350626628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,31,0.3798666795094808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,63,0.4367306629816691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,63,0.43680532773335773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,63,0.3833920160929362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,63,0.3835039933522542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,127,0.4408533175786336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,127,0.38542401790618896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,127,0.4412959814071655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,127,0.38381866614023846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,float16,255,0.44121066729227704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,float16,255,0.5209120114644369
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,128,1,float16,fp8,255,0.38542401790618896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,64,0,1,float16,fp8,255,0.46346131960550946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,1,0.3407680193583171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,1,0.27431466182072956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,1,0.341973344484965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,1,0.27509333690007526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,3,0.34090665976206463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,3,0.3408426841100057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,3,0.27583465973536175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,3,0.2751839955647786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,7,0.35275201002756756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,7,0.285370667775472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,7,0.2853066722551982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,7,0.35387734572092694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,15,0.35922666390736896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,15,0.3596693277359009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,15,0.2974826693534851
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,15,0.29756800333658856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,31,0.433296004931132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,31,0.434005339940389
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,31,0.379749337832133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,63,0.4371466636657715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,31,0.38075733184814453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,63,0.43703468640645343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,63,0.3836053212483724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,63,0.3834293286005656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,127,0.4410613377888997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,127,0.4410826762517293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,127,0.3855839967727661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,float16,255,0.5205599864323934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,1,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,fp8,255,0.3854453166325887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,127,0.3866026798884074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,128,1,float16,float16,255,0.4410719871520996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,1,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,64,0,1,float16,fp8,255,0.4620320002237956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,1,0.03435199956099192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,1,0.03283733377854029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,7,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,3,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,3,0.03402133285999298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,3,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,7,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,15,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,3,0.03345600018898646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,7,0.033615998923778534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,7,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,15,0.03405333310365677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,15,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,15,0.033674667278925575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,31,0.03374933451414108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,31,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,31,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,31,0.03302400062481562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,63,0.033039999504884086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,63,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,63,0.033733333150545754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,63,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,127,0.03450666616360346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,127,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,127,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,255,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,fp8,127,0.033930666744709015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,float16,255,0.03396799912055334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,1,0.04770133395989736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,128,1,float16,float16,255,0.03341866781314214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,64,0,1,float16,fp8,255,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,3,0.047685335079828896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,1,0.047882666190465294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,1,0.045594667394955955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,1,0.04562666515509287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,3,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,3,0.04553066690762838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,7,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,3,0.044250667095184326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,7,0.04753600060939789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,7,0.04478399952252706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,7,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,15,0.04772266745567322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,15,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,15,0.0454720010360082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,31,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,15,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,31,0.047541335225105286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,31,0.04410133262475332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,31,0.04775999983151754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,63,0.047824000318845115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,63,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,127,0.047695999344189964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,63,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,63,0.04384533564249674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,127,0.047466665506362915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,127,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,127,0.044218664367993675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,1,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,float16,255,0.04910400013128916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,float16,255,0.04961066444714864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,128,1,float16,fp8,255,0.0458186666170756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,64,0,1,float16,fp8,255,0.04515733321507772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,3,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,1,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,3,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,7,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,1,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,1,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,3,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,3,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,7,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,15,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,7,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,7,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,15,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,15,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,15,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,31,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,31,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,31,0.01621866722901662
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,31,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,63,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,127,0.013728000223636627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,63,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,63,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,63,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,127,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,127,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,127,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,255,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,255,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,255,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,255,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,511,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,511,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,511,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,511,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,2047,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,1023,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,1023,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,1023,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,1023,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,2047,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,2047,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,2047,0.052000001072883606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,4095,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,4095,0.09510933359464009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,4095,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,4095,0.09061333537101746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,16383,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,8191,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,8191,0.17895466089248657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,16383,0.35443735122680664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,8191,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,8191,0.16645866632461548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,16383,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,16383,0.3202986717224121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,float16,32767,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,1,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,1,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,128,1,float16,fp8,32767,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,1,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,1,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,3,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,3,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,float16,32767,0.9505386352539062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,64,0,1,float16,fp8,32767,0.6706879933675131
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,3,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,3,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,15,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,7,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,7,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,7,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,7,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,15,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,15,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,15,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,31,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,31,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,31,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,31,0.01626666635274887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,63,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,63,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,63,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,63,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,127,0.013557333499193192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,127,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,127,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,127,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,255,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,255,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,255,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,255,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,511,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,511,0.02088533341884613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,511,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,511,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,2047,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,1023,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,1023,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,1023,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,1023,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,2047,0.052746668457984924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,4095,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,2047,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,2047,0.05197333296140035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,4095,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,8191,0.17901867628097534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,4095,0.09505599737167358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,4095,0.09065600236256917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,8191,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,8191,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,8191,0.16670932372411093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,16383,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,float16,32767,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,16383,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,16383,0.35442133744557697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,16383,0.32045332590738934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,1,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,128,1,float16,fp8,32767,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,1,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,1,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,3,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,7,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,3,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,fp8,32767,0.6701493263244629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,3,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,64,0,1,float16,float16,32767,0.9946666558583578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,15,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,31,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,127,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,127,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,127,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,255,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,255,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,511,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,511,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,1023,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,1023,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,1023,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,1023,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,4095,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,2047,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,2047,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,2047,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,2047,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,4095,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,4095,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,4095,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,8191,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,8191,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,8191,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,16383,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,8191,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,16383,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,16383,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,16383,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,float16,32767,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,1,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,float16,32767,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,1,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,128,1,float16,fp8,32767,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,1,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,64,0,1,float16,fp8,32767,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,3,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,7,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,7,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,7,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,15,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,15,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,31,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,63,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,127,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,255,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,511,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,2047,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,1023,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,1023,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,2047,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,2047,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,2047,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,8191,0.02384000023206075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,4095,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,4095,0.01817600056529045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,4095,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,4095,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,8191,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,8191,0.01258133351802826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,8191,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,16383,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,16383,0.039546666045983635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,16383,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,16383,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,float16,32767,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,float16,32767,0.06325866778691609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,128,1,float16,fp8,32767,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,64,0,1,float16,fp8,32767,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,1,0.676911989847819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,1,0.6766719818115234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,1,0.5385226806004842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,1,0.5394986470540365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,3,0.6766933600107828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,3,0.6766400337219238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,3,0.538693348566691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,3,0.5395093361536661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,7,0.6979680061340332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,7,0.697317361831665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,7,0.5636853377024332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,7,0.5637226502100626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,15,0.7133066654205322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,15,0.5868800083796183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,15,0.5882399876912435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,31,0.8614880243937174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,31,0.8608160018920898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,31,0.7542933622996012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,31,0.7542239824930826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,63,0.8675306638081869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,63,0.7600853443145752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,63,0.8675519625345866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,63,0.7603573004404703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,float16,127,0.875104029973348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,128,1,float16,fp8,127,0.7627786795298258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,127,0.874778668085734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,fp8,127,0.763813336690267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,1,0.6767840385437012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,1,0.6761439641316732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,1,0.5428640047709147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,1,0.5420053402582804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,3,0.6767679850260416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,3,0.6767840385437012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,3,0.5432000160217285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,3,0.5431786775588989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,7,0.6987360318501791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,7,0.6985759735107422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,7,0.5639733473459879
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,64,0,1,float16,float16,15,0.7132586638132731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,7,0.5642720063527426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,15,0.713322639465332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,15,0.7133653163909912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,15,0.5882240136464437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,15,0.588207999865214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,31,0.8614400227864584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,31,0.8612266381581625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,31,0.7549813588460287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,31,0.7545920213063558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,63,0.8674879868825277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,63,0.8683359622955322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,63,0.7600106398264567
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,63,0.760159969329834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,float16,127,0.8757226467132568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,float16,127,0.8752906322479248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,1,0.05811200042565664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,128,1,float16,fp8,127,0.7628586292266846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,1,0.058117335041364036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,64,0,1,float16,fp8,127,0.7635040283203125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,1,0.05589866638183594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,1,0.056133334835370384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,3,0.058277333776156105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,3,0.05813866853713989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,3,0.05596266686916351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,3,0.05593599875768026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,7,0.0577706644932429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,7,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,7,0.05598933498064677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,7,0.056277334690093994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,15,0.05801600217819214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,15,0.05807466804981232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,15,0.05584000051021576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,15,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,31,0.05795200169086456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,31,0.0581226646900177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,31,0.05598400036493937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,31,0.056133334835370384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,63,0.05796800057093302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,63,0.05783466498057047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,63,0.05583466589450836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,63,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,float16,127,0.05992533266544342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,float16,127,0.05858666698137919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,128,1,float16,fp8,127,0.05613866448402405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,64,0,1,float16,fp8,127,0.055786664287249245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,1,0.08473066488901775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,1,0.08030933141708374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,1,0.08052800099054973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,1,0.08496532837549846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,3,0.08665066957473755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,3,0.0848533312479655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,3,0.0804906686147054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,3,0.0801386684179306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,7,0.08521599570910136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,7,0.08658666412035625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,7,0.0804746647675832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,7,0.08035199840863545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,15,0.08646399776140849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,15,0.08070933322111766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,15,0.08496000369389851
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,15,0.07979733248551686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,31,0.08545066912968953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,31,0.0865066647529602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,31,0.08044266700744629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,31,0.08037866652011871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,63,0.08613333106040955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,63,0.0865760048230489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,63,0.08038933575153351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,63,0.08037866652011871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,float16,127,0.08719467123349507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,float16,127,0.08669867118199666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,0,1,float16,fp8,127,0.08037333190441132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,64,128,1,float16,fp8,127,0.08041599889596303
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,1,1.3473493258158367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,1,1.0702239672342937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,1,1.3467787106831868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,1,1.0681440035502117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,3,1.3478986422220867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,3,1.0710240205128987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,3,1.347866694132487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,3,1.069423993428548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,7,1.3887252807617188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,7,1.121077299118042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,7,1.3879520098368328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,7,1.1211573282877605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,15,1.4209334055582683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,15,1.1686506271362305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,15,1.4213600158691406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,15,1.1686240037282307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,31,1.7162346839904785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,31,1.5027839342753093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,31,1.7161332766215007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,31,1.5024693806966145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,float16,63,1.730218728383382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,float16,63,1.730629285176595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,128,1,float16,fp8,63,1.5136213302612305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,64,0,1,float16,fp8,63,1.5131893157958984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,1,1.3466933568318684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,1,1.3462665875752766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,1,1.0785866578420003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,1,1.0790773232777913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,3,1.0792213280995686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,3,1.348159948984782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,3,1.3480106989542644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,3,1.0780373414357503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,7,1.3902506828308105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,7,1.3899413744608562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,7,1.122389316558838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,7,1.1223999659220378
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,15,1.4201812744140625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,15,1.1695679823557537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,15,1.4206080436706543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,15,1.1695146560668945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,31,1.7189653714497883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,31,1.5028533935546875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,31,1.7183359464009602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,31,1.5028479894002278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,float16,63,1.7468959490458171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,float16,63,1.746773401896159
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,1,0.10905067125956218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,1,0.1090186635653178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,1,0.10114133358001709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,1,0.1009333332379659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,128,1,float16,fp8,63,1.5134240786234539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,64,0,1,float16,fp8,63,1.5149760246276855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,3,0.10916800300280254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,3,0.10802666346232097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,3,0.10116266210873921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,3,0.10105066498120625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,7,0.10930132865905762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,7,0.10874666770299275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,7,0.10101866722106934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,7,0.1013813316822052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,15,0.10903466741243999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,15,0.10864532987276714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,15,0.10124267141024272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,15,0.10159466663996379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,31,0.10915733377138774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,31,0.1090826690196991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,31,0.1009173293908437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,31,0.10160533587137859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,float16,63,0.10834667086601257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,float16,63,0.10844799876213074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,128,1,float16,fp8,63,0.10129066308339436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,64,0,1,float16,fp8,63,0.1011840005715688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,1,0.16236799955368042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,1,0.1622933348019918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,1,0.15054399768511453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,1,0.15108266472816467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,3,0.16243199507395426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,3,0.16230400403340658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,3,0.15025066335995993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,3,0.15026133259137472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,7,0.16201600432395935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,7,0.16234133640925089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,7,0.15132799744606018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,7,0.1509173313776652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,15,0.16226133704185486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,15,0.1623199979464213
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,15,0.15013866623242697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,15,0.15011200308799744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,31,0.1622666617234548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,31,0.15019733707110086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,31,0.15133866667747498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,1,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,1,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,float16,63,0.1623199979464213
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,1,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,1,0.01589866727590561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,3,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,3,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,fp8,63,0.1500640014807383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,3,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,0,1,float16,fp8,63,0.15009066462516785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,7,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,3,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,7,0.018325333793958027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,7,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,7,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,15,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,15,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,15,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,15,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,31,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,31,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,31,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,31,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,63,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,63,0.01937066639463107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,63,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,63,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,127,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,127,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,127,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,31,0.1612320045630137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,255,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,127,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,255,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,255,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,255,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,511,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,64,128,1,float16,float16,63,0.16218133767445883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,511,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,511,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,511,0.0296426663796107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,1023,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,1023,0.04950400193532308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,1023,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,1023,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,2047,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,2047,0.08458133538564046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,4095,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,2047,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,2047,0.08261333405971527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,4095,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,4095,0.15447466572125754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,4095,0.15229866902033487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,8191,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,8191,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,8191,0.297818660736084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,8191,0.29446399211883545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,float16,16383,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,128,1,float16,fp8,16383,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,1,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,1,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,float16,16383,0.6182133356730143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,1,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,1,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,64,0,1,float16,fp8,16383,0.6214239994684855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,7,0.018565333137909572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,3,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,3,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,3,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,3,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,7,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,7,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,7,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,15,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,15,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,15,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,15,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,31,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,31,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,31,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,31,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,63,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,63,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,63,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,63,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,127,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,127,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,127,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,127,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,255,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,255,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,255,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,255,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,511,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,511,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,511,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,2047,0.019637333850065868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,511,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,1023,0.019968000551064808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,1023,0.049770668148994446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,1023,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,1023,0.04761599997679392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,2047,0.0844533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,2047,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,2047,0.08271466692288716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,4095,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,4095,0.1551199952761332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,4095,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,4095,0.15225066741307577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,8191,0.2956479986508687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,8191,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,8191,0.2978026668230693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,8191,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,float16,16383,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,128,1,float16,fp8,16383,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,float16,16383,0.6272533337275187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,1,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,1,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,64,0,1,float16,fp8,16383,0.6201546589533488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,3,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,7,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,7,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,7,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,31,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,31,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,127,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,255,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,255,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,511,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,511,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,511,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,1023,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,1023,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,2047,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,2047,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,2047,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,4095,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,2047,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,4095,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,4095,0.019823999454577763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,4095,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,8191,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,8191,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,8191,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,8191,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,float16,16383,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,float16,16383,0.04427200059096018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,128,1,float16,fp8,16383,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,64,0,1,float16,fp8,16383,0.036559998989105225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,1,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,3,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,31,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,31,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,63,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,127,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,127,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,255,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,255,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,511,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,511,0.011424000064531961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,1023,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,1023,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,2047,0.01786133274435997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,2047,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,2047,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,4095,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,4095,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,4095,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,4095,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,8191,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,8191,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,8191,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,8191,0.03155199935038885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,float16,16383,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,3,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,float16,16383,0.06163733204205831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,128,1,float16,fp8,16383,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,1,0.02611733227968216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,64,0,1,float16,fp8,16383,0.053786665201187134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,1,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,3,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,31,0.02922133356332779
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,7,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,7,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,15,0.0315786674618721
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,15,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,31,0.031541332602500916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,63,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,63,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,511,0.08498666683832805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,127,0.037818667789300285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,127,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,255,0.055786664287249245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,255,0.051701332132021584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,511,0.08876799543698628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,1023,0.15877333283424377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,1023,0.15247467160224915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,2047,0.2977760036786397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,2047,0.28544533252716064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,4095,0.5777013301849365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,1,0.02605333427588145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,4095,0.55348801612854
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,1,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,7,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,3,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,15,0.03165333221356074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,3,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,7,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,float16,8191,1.1516640186309814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,63,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,1,128,0,1,float16,fp8,8191,1.0930826663970947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,15,0.02935466667016347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,31,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,255,0.05605333546797434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,31,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,511,0.08922132849693298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,63,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,127,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,1023,0.1585813363393148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,127,0.03528533379236857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,255,0.05180799961090088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,511,0.08516266942024231
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,1023,0.15244799852371216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,2047,0.2978453238805135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,2047,0.2854880094528198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,4095,0.5845066706339518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,4095,0.5532639821370443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,7,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,15,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,fp8,8191,1.102672020594279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,2,128,0,1,float16,float16,8191,1.17084797223409
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,15,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,31,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,63,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,127,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,255,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,255,0.01227733368674914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,2047,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,511,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,4095,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,511,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,1023,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,1,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,1023,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,2047,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,4095,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,float16,8191,0.05840533475081126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,4,128,0,1,float16,fp8,8191,0.042133331298828125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,1,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,3,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,7,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,63,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,7,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,127,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,127,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,15,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,15,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,31,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,31,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,1023,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,63,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,255,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,255,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,511,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,511,0.014485333114862442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,1023,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,2047,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,2047,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,4095,0.058864002426465355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,4095,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,float16,8191,0.10116799672444661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,7,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,1,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,15,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,64,8,128,0,1,float16,fp8,8191,0.06117866436640421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,3,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,3,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,7,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,15,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,31,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,63,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,63,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,127,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,127,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,255,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,255,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,511,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,511,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,1023,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,1023,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,2047,0.0335413341720899
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,2047,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,4095,0.04801600178082784
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,4095,0.047914668917655945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,8191,0.08029866715272267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,8191,0.0743146687746048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,16383,0.1418773333231608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,16383,0.12545599540074667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,1,0.008613333106040955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,1,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,float16,32767,0.26500266790390015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,3,0.008634666601816813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,1,128,0,1,float16,fp8,32767,0.2278560002644857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,3,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,7,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,7,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,15,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,15,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,31,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,63,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,63,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,127,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,127,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,255,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,255,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,511,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,511,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,1023,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,1023,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,8191,0.07431999842325847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,2047,0.033530667424201965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,2047,0.03532800078392029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,4095,0.04967466493447622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,4095,0.047839999198913574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,8191,0.08005333443482716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,16383,0.1421226660410563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,32767,0.22616000970204672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,fp8,16383,0.12563733259836832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,1,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,2,128,0,1,float16,float16,32767,0.26477867364883423
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,31,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,255,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,511,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,4095,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,1023,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,1023,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,2047,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,2047,0.012341332932313284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,4095,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,8191,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,8191,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,16383,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,16383,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,float16,32767,0.021583999196688335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,1,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,4,128,0,1,float16,fp8,32767,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,7,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,15,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,127,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,255,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,2047,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,255,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,4095,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,1023,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,2047,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,4095,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,8191,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,8191,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,16383,0.01855466639002164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,16383,0.017551999539136887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,1,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,1,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,float16,32767,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,64,8,128,0,1,float16,fp8,32767,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,3,0.008618666479984919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,3,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,7,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,7,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,15,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,15,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,31,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,63,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,63,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,127,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,255,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,255,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,511,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,511,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,1023,0.03032533327738444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,1023,0.03123733401298523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,2047,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,2047,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,4095,0.06444266438484192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,16383,0.20366400480270386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,4095,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,8191,0.1113973359266917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,8191,0.09891733527183533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,16383,0.17537599802017212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,1,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,1,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,3,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,float16,32767,0.3884640137354533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,1,128,0,1,float16,fp8,32767,0.3285599946975708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,63,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,3,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,7,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,7,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,15,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,31,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,63,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,127,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,127,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,255,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,2047,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,255,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,4095,0.06428800026575725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,511,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,511,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,1023,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,1023,0.03145600110292435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,2047,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,4095,0.06038933495680491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,8191,0.1111306647459666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,8191,0.09916266798973083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,16383,0.20346667369206747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,16383,0.17626667022705078
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,1,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,1,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,float16,32767,0.3884799877802531
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,3,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,2,128,0,1,float16,fp8,32767,0.33029333750406903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,3,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,63,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,127,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,1023,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,255,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,511,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,511,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,1023,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,2047,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,2047,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,4095,0.016693333784739178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,4095,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,8191,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,8191,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,16383,0.02000533292690913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,16383,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,float16,32767,0.02409599969784419
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,4,128,0,1,float16,fp8,32767,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,1,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,3,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,7,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,15,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,63,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,127,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,511,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,2047,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,2047,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,4095,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,4095,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,8191,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,8191,0.0162773331006368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,16383,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,16383,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,1,0.047872001926104225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,float16,32767,0.038218667109807335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,64,8,128,0,1,float16,fp8,32767,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,7,0.04362666606903076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,1,0.04001066585381826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,3,0.04809066653251648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,3,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,7,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,15,0.05205333232879639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,15,0.05829866727193197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,31,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,31,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,63,0.05993066728115082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,63,0.05378133555253347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,127,0.07022933165232341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,127,0.06401599943637848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,255,0.1032480001449585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,255,0.09706133604049683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,511,0.17121599117914835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,511,0.1627786656220754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,2047,0.584384004275004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,1023,0.3096853295962016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,1023,0.2934933304786682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,1,0.047824000318845115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,2047,0.555727998415629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,7,0.04978133241335551
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,1,0.039877332746982574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,3,0.049770668148994446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,3,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,31,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,float16,4095,1.152085304260254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,7,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,1,128,0,1,float16,fp8,4095,1.0797866980234783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,15,0.0581226646900177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,15,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,31,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,63,0.060122668743133545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,63,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,127,0.07051733136177063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,127,0.06418666740258534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,255,0.10327466328938802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,255,0.09707199533780415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,511,0.17112000783284506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,511,0.16264533003171286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,1023,0.3096906741460164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,1023,0.29375465710957843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,2047,0.5930080016454061
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,2047,0.555840015411377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,3,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,1,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,1,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,3,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,15,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,float16,4095,1.1663040320078533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,7,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,2,128,0,1,float16,fp8,4095,1.0893706480662029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,7,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,15,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,31,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,31,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,63,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,63,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,127,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,127,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,1023,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,255,0.013455999394257864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,2047,0.027610667049884796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,255,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,511,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,511,0.014709333578745524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,1023,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,3,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,2047,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,float16,4095,0.06128533184528351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,4,128,0,1,float16,fp8,4095,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,1,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,15,0.015530666957298914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,1,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,3,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,7,0.015333333363135656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,7,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,15,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,127,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,31,0.01580799991885821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,31,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,63,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,511,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,63,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,127,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,255,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,255,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,511,0.018218666315078735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,1023,0.035487999518712364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,1023,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,2047,0.06028266747792562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,2047,0.04109866668780645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,float16,4095,0.10286399722099304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,1,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,64,8,128,0,1,float16,fp8,4095,0.06192000210285187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,1,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,3,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,3,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,7,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,7,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,15,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,15,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,31,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,31,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,63,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,127,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,1023,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,2047,0.05406933526198069
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,255,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,255,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,511,0.021722666919231415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,511,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,8191,0.1770026683807373
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,1023,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,2047,0.04969066878159841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,4095,0.09476266304651897
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,4095,0.08865599830945332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,8191,0.1664959987004598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,16383,0.34219201405843097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,16383,0.3220799962679545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,1,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,1,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,3,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,3,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,float16,32767,0.6708160241444906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,7,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,1,128,0,1,float16,fp8,32767,0.7296799818674723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,7,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,15,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,127,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,15,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,31,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,31,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,63,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,63,0.012144000579913458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,255,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,2047,0.054048001766204834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,255,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,511,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,4095,0.08888000249862671
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,511,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,1023,0.03332266708215078
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,8191,0.16639467080434164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,1023,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,2047,0.0480373352766037
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,4095,0.09504533807436626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,8191,0.1768959959348043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,16383,0.3428693215052287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,16383,0.3222879966100057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,1,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,1,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,3,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,3,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,float16,32767,0.6814080079396566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,2,128,0,1,float16,fp8,32767,0.7304480075836182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,63,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,63,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,255,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,2047,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,1023,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,2047,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,4095,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,4095,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,8191,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,8191,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,16383,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,16383,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,float16,32767,0.04074133435885111
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,1,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,4,128,0,1,float16,fp8,32767,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,3,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,3,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,15,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,31,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,127,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,255,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,2047,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,1023,0.012080000092585882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,1023,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,4095,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,4095,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,8191,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,8191,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,16383,0.037685332198937736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,3,0.09092799822489421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,16383,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,float16,32767,0.05931200087070465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,1,0.08867733677228291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,64,8,128,0,1,float16,fp8,32767,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,1,0.07423466444015503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,3,0.07663999994595845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,7,0.09259200096130371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,7,0.08065600196520488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,15,0.11327999830245972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,127,0.13384000460306802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,15,0.0993333359559377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,31,0.11270399888356526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,31,0.10014399886131287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,63,0.11356266339619954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,63,0.10084799925486247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,511,0.3365066846211751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,127,0.12165333827336629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,255,0.2016693353652954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,255,0.1869386633237203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,511,0.3165546655654907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,1023,0.6090879837671915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,1,0.0888426701227824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,1023,0.5761866569519043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,1,0.07461333274841309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,3,0.09091732899347942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,3,0.07646933197975159
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,15,0.1125440001487732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,15,0.09919466574986775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,7,0.09275733431180318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,float16,2047,1.1669066747029622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,1,128,0,1,float16,fp8,2047,1.0958773295084636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,7,0.08064533273379008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,31,0.11257599790891011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,31,0.09913067022959392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,255,0.20136533180872598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,63,0.11317333579063416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,63,0.10089066624641418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,127,0.13366933663686117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,127,0.12166399757067363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,255,0.18690667549769083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,511,0.3352160056432088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,511,0.3165439963340759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,1023,0.6203253269195557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,1,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,1023,0.5762933492660522
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,1,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,3,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,3,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,float16,2047,1.178320010503133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,31,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,7,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,2,128,0,1,float16,fp8,2047,1.11081067721049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,7,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,127,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,15,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,15,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,31,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,63,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,63,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,127,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,1023,0.036981334288915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,255,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,255,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,2047,0.062368000547091164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,float16,511,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,511,0.020341333001852036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,1023,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,4,128,0,1,float16,fp8,2047,0.04524266719818115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,1,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,7,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,1,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,3,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,3,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,7,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,15,0.02229333420594533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,15,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,127,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,31,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,31,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,63,0.021114667256673176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,63,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,127,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,255,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,255,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,511,0.035205334424972534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,511,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,1023,0.05494399865468343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,1023,0.03700799991687139
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,float16,2047,0.09673066933949788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,64,8,128,0,1,float16,fp8,2047,0.05974400043487549
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,1,0.17052799463272095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,7,0.15445866187413534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,1,0.1418773333231608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,3,0.14704533418019614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,3,0.17676266034444174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,31,0.22000000874201456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,7,0.17904533942540488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,15,0.21972266832987467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,15,0.19310933351516724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,127,0.2606400052706401
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,31,0.1933280030886332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,63,0.22131200631459555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,63,0.19352000951766968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,127,0.23817066351572672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,1,0.17046932379404703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,255,0.3943093220392863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,255,0.3656586805979411
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,1,0.14201600352923074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,float16,511,0.6599839925765991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,1,128,0,1,float16,fp8,511,0.6225813229878744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,3,0.17682133118311563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,3,0.1479200025399526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,7,0.17947200934092203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,7,0.15441067020098367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,15,0.21987199783325195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,15,0.19292799631754556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,31,0.2196000019709269
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,31,0.19337600469589233
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,63,0.22107734282811484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,63,0.1950933337211609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,127,0.2607040007909139
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,127,0.23678400119145712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,255,0.3944586515426636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,float16,511,0.6766026814778646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,255,0.36531198024749756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,1,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,1,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,3,0.02610666553179423
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,2,128,0,1,float16,fp8,511,0.6223946809768677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,3,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,7,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,7,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,15,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,15,0.023045333723227184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,31,0.025008000433444977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,31,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,255,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,63,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,63,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,127,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,127,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,255,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,float16,511,0.03804266701141993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,4,128,0,1,float16,fp8,511,0.030426666140556335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,1,0.03320533285538355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,1,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,3,0.03347733368476232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,3,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,7,0.03150933235883713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,7,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,15,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,15,0.029279999434947968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,127,0.031770666440327965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,31,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,31,0.029322666426499683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,63,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,63,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,127,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,511,0.04192533095677694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,255,0.03691199918588003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,fp8,255,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,64,8,128,0,1,float16,float16,511,0.05784533421198527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,1,0.33269333839416504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,1,0.277349332968394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,3,0.34882132212320965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,3,0.28763200839360553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,7,0.3547626733779907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,7,0.30185600121816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,15,0.4329386552174886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,15,0.37941332658131915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,31,0.43299734592437744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,31,0.3800479968388875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,63,0.4367893139521281
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,63,0.3817280133565267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,127,0.5151306788126627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,127,0.4673493305842082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,1,0.3326293428738912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,1,0.277402659257253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,float16,255,0.7826933066050211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,3,0.34881067276000977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,1,128,0,1,float16,fp8,255,0.7233813603719076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,3,0.28837867577870685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,7,0.3546559810638428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,7,0.30288533369700116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,15,0.43301331996917725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,15,0.3782186508178711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,31,0.4330293337504069
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,31,0.379744013150533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,63,0.43667201201121014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,63,0.38130664825439453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,127,0.5159840186436971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,1,0.039461334546407066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,127,0.46775468190511066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,1,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,3,0.03937600056330363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,3,0.03756266583998998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,float16,255,0.8099199930826823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,2,128,0,1,float16,fp8,255,0.7263360023498535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,7,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,7,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,15,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,15,0.0376800000667572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,63,0.037674665451049805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,127,0.03984000037113825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,31,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,31,0.03765333443880081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,63,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,127,0.037717332442601524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,float16,255,0.04348800083001455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,4,128,0,1,float16,fp8,255,0.03875199953715006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,1,0.056159997979799904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,1,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,3,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,3,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,7,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,7,0.04975466430187225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,15,0.05609600245952606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,15,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,31,0.05607999861240387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,31,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,127,0.05037866532802582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,1,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,63,0.055861334005991616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,63,0.04974933465321859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,127,0.05635199944178263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,float16,255,0.06224533418814341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,1,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,64,8,128,0,1,float16,fp8,255,0.052095999320348106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,3,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,3,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,7,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,7,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,15,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,15,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,31,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,31,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,63,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,63,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,127,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,127,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,255,0.020010666300853092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,255,0.0233599990606308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,511,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,511,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,1023,0.05215999980767568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,1023,0.05213333169619242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,2047,0.09473599990208943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,2047,0.0906826655069987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,4095,0.17835734287897745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,4095,0.16647467017173767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,8191,0.3469333251317342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,8191,0.3197173277537028
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,1,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,16383,0.6970453262329102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,16383,0.6250293254852295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,1,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,3,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,3,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,7,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,7,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,15,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,15,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,fp8,32767,1.4485012690226238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,31,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,31,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,1,128,0,1,float16,float16,32767,1.9778186480204265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,63,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,63,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,127,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,127,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,255,0.02094399929046631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,255,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,511,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,2047,0.0906826655069987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,511,0.03328000009059906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,1023,0.05184000233809153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,1023,0.052101333936055504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,2047,0.09353599945704143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,4095,0.1797920068105062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,4095,0.16672533750534058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,8191,0.3506186803181966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,8191,0.3200906713803609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,1,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,16383,0.7112800280253092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,16383,0.625055988629659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,31,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,15,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,63,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,fp8,32767,1.5223414103190105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,511,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,127,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,2,128,0,1,float16,float16,32767,2.4921813011169434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,2047,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,255,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,511,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,8191,0.022111999491850536
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,1023,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,1023,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,2047,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,4095,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,4095,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,8191,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,16383,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,16383,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,float16,32767,0.06025599936644236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,4,128,0,1,float16,fp8,32767,0.04359466830889384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,15,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,31,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,127,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,255,0.010538666198650995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,2047,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,2047,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,255,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,1023,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,1023,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,16383,0.058677335580190025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,4095,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,4095,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,8191,0.03758399933576584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,8191,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,16383,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,float16,32767,0.10079999764760335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,64,8,128,0,1,float16,fp8,32767,0.06165333092212677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,1,0.6596266825993856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,1,0.5474613507588705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,3,0.693066676457723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,3,0.5696906646092733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,7,0.5988266468048096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,7,0.7043626308441162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,15,0.859989325205485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,15,0.7505973180135092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,31,0.8613333702087402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,31,0.7539520263671875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,63,0.8675519625345866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,63,0.757034699122111
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,float16,127,1.032490650812785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,1,0.6582186619440714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,1,128,0,1,float16,fp8,127,0.9289920330047607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,1,0.5493760108947754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,3,0.6930879751841227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,3,0.5705706675847372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,7,0.7046933174133301
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,7,0.5988800128300985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,15,0.8608106772104899
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,15,0.7518239816029867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,31,0.8625813325246176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,31,0.7542933622996012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,63,0.8709440231323242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,63,0.7580479780832926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,1,0.0699786643187205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,1,0.06482133269309998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,3,0.07050133248170216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,float16,127,1.0701440175374348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,3,0.06401066482067108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,2,128,0,1,float16,fp8,127,0.9312853018442789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,7,0.07019733389218648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,7,0.06396799782911937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,15,0.07051200171311696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,15,0.06461866696675618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,31,0.07020266850789388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,31,0.06480533381303151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,63,0.07037866612275441
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,63,0.06400000055631001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,float16,127,0.07006399830182393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,4,128,0,1,float16,fp8,127,0.06605866551399231
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,1,0.10198932886123657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,1,0.08896533648173015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,3,0.10148800412813823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,3,0.09000000357627869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,7,0.10109866658846538
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,7,0.08969066540400188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,15,0.102101335922877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,15,0.08890666564305623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,31,0.10219200452168782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,31,0.08884800473848979
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,63,0.10221866766611735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,63,0.08992532889048259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,float16,127,0.10073066751162212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,64,8,128,0,1,float16,fp8,127,0.09075733025868733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,1,1.0890133380889893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,1,1.3161973158518474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,3,1.1374239921569824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,3,1.385109265645345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,7,1.199280023574829
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,7,1.409663995107015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,15,1.501050631205241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,15,1.7217440605163574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,31,1.7370719909667969
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,31,1.5134240786234539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,1,1.094714641571045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,1,1.319648027420044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,fp8,63,1.5393813451131184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,1,128,0,1,float16,float16,63,1.7733599344889324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,3,1.3907999992370605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,3,1.1422346433003743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,7,1.4303146998087566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,7,1.2035306294759114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,15,1.5084320704142253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,15,1.7421387036641438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,31,1.7674773534138997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,31,1.5355092684427898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,float16,63,1.7875733375549316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,2,128,0,1,float16,fp8,63,1.570031960805257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,1,0.1292800009250641
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,1,0.1202239990234375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,3,0.12948266665140787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,3,0.12020799517631531
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,7,0.1285599966843923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,7,0.11932800213495891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,15,0.1291146675745646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,15,0.12103999654452006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,31,0.12894933422406515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,31,0.12132799625396729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,float16,63,0.1295413374900818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,4,128,0,1,float16,fp8,63,0.12073066830635071
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,1,0.19090133905410767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,1,0.16779732704162598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,3,0.19106133778889975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,3,0.16844799121220908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,7,0.19097065925598145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,7,0.16840000947316489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,1,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,1,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,3,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,15,0.19124799966812134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,3,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,15,0.1684373418490092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,7,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,31,0.1909866730372111
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,31,0.1673813263575236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,7,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,15,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,float16,63,0.1909546653429667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,15,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,64,8,128,0,1,float16,fp8,63,0.16846400499343872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,127,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,31,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,31,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,63,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,63,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,127,0.02070933332045873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,255,0.031152000029881794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,255,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,511,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,511,0.04764266808827718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,1023,0.08458133538564046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,4095,0.2974239985148112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,1023,0.0810346653064092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,2047,0.15451733271280924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,2047,0.15030399958292642
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,4095,0.28939199447631836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,8191,0.583903988202413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,1,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,8191,0.5666933457056681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,1,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,3,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,float16,16383,1.2578186988830566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,3,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,7,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,7,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,15,0.018794666975736618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,15,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,1,128,0,1,float16,fp8,16383,1.3699040412902832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,31,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,31,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,63,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,63,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,127,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,127,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,255,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,255,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,511,0.04975999891757965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,511,0.045893331368764244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,1023,0.08448533217112224
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,1023,0.08111466467380524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,2047,0.15416000286738077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,2047,0.14998400211334229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,4095,0.29974399010340375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,4095,0.2892000079154968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,8191,0.5889813502629598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,1,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,8191,0.5664426485697428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,7,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,fp8,16383,1.5216372807820637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,2,128,0,1,float16,float16,16383,1.4120747248331706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,63,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,127,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,2047,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,511,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,511,0.012298667182525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,1023,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,1023,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,2047,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,4095,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,4095,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,8191,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,8191,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,float16,16383,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,1,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,4,128,0,1,float16,fp8,16383,0.043653334180514015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,1,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,7,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,63,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,127,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,511,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,1023,0.013381333400805792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,1023,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,2047,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,2047,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,4095,0.03740799923737844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,4095,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,8191,0.0580266664425532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,8191,0.03750933210055033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,float16,16383,0.0997759997844696
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,1,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,64,8,128,0,1,float16,fp8,16383,0.059562668204307556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,1,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,1,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,1,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,3,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,7,0.018101333330074947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,3,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,3,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,3,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,7,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,7,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,7,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,31,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,15,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,15,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,15,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,15,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,31,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,31,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,31,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,63,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,63,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,63,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,255,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,63,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,127,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,127,0.019130667050679524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,127,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,127,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,255,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,511,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,255,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,255,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,511,0.019930666933457058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,511,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,511,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,1023,0.019386666516462963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,1023,0.04965866605440775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,1023,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,1023,0.0476693312327067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,2047,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,2047,0.08479467034339905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,2047,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,4095,0.15241600076357523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,2047,0.0827466646830241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,4095,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,8191,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,8191,0.2977280020713806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,4095,0.15449600418408713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,4095,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,8191,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,8191,0.2950719992319743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,float16,16383,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,128,1,float16,fp8,16383,0.01739199956258138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,1,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,float16,16383,0.6615893443425497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,1,0.012533333152532578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,7,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,64,0,1,float16,fp8,16383,0.6219786802927653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,3,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,31,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,15,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,15,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,31,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,31,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,63,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,63,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,127,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,127,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,1023,0.012645332763592402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,1023,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,2047,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,2047,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,2047,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,2047,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,4095,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,8191,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,4095,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,4095,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,4095,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,8191,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,8191,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,fp8,16383,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,float16,16383,0.045082668463389076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,8191,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,128,1,float16,float16,16383,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,64,0,1,float16,fp8,16383,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,1,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,7,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,7,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,15,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,15,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,31,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,63,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,63,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,127,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,127,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,127,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,255,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,511,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,511,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,1023,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,1023,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,2047,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,2047,0.01798933371901512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,2047,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,2047,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,4095,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,4095,0.02364266663789749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,8191,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,4095,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,4095,0.021162666380405426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,8191,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,8191,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,8191,0.031199999153614044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,float16,16383,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,1,0.011658667276302973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,float16,16383,0.06044800082842509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,3,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,128,1,float16,fp8,16383,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,64,0,1,float16,fp8,16383,0.05422399938106537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,1,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,1,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,3,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,3,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,3,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,7,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,7,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,7,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,7,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,15,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,15,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,31,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,63,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,15,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,31,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,31,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,31,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,63,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,63,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,127,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,511,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,127,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,255,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,255,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,511,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,511,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,511,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,1023,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,1023,0.016229332735141117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,1023,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,1023,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,2047,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,2047,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,2047,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,2047,0.023120000958442688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,4095,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,4095,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,4095,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,4095,0.03320533285538355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,8191,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,8191,0.06241066753864288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,8191,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,float16,16383,0.10699733098347981
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,8191,0.05450133482615153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,float16,16383,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,128,1,float16,fp8,16383,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,1,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,1,0.008639999975760778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,64,0,1,float16,fp8,16383,0.09261866410573323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,1,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,1,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,3,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,3,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,3,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,3,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,7,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,15,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,7,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,7,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,15,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,15,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,15,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,31,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,31,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,31,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,127,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,31,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,63,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,63,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,127,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,127,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,127,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,255,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,255,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,255,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,255,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,511,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,511,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,511,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,2047,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,511,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,1023,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,1023,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,1023,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,1023,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,2047,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,2047,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,2047,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,4095,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,4095,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,4095,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,4095,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,8191,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,8191,0.03732266773780187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,8191,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,8191,0.03770666569471359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,16383,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,16383,0.05418133238951365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,16383,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,16383,0.05410666763782501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,32767,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,32767,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,32767,0.09076266487439473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,32767,0.08455999692281087
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,1,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,float16,65535,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,3,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,1,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,float16,65535,0.166512002547582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,3,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,128,1,float16,fp8,65535,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,64,0,1,float16,fp8,65535,0.14411200086275736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,3,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,7,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,7,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,7,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,15,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,31,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,31,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,15,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,31,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,63,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,63,0.010538666198650995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,63,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,127,0.009935999910036722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,127,0.009322666873534521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,127,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,255,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,255,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,511,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,511,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,2047,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,1023,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,1023,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,2047,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,2047,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,8191,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,2047,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,4095,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,4095,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,4095,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,4095,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,8191,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,8191,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,16383,0.01966933285196622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,8191,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,16383,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,16383,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,16383,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,32767,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,32767,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,32767,0.022442666192849476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,32767,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,1,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,float16,65535,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,1,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,fp8,65535,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,0,1,float16,float16,65535,0.02476799984773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,64,128,1,float16,fp8,65535,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,3,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,3,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,3,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,7,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,7,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,7,0.010496000448862711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,15,0.010453333457310995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,15,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,15,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,31,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,63,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,63,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,63,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,127,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,127,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,127,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,127,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,255,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,255,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,511,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,511,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,2047,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,511,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,1023,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,2047,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,1023,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,1023,0.01219733307758967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,1023,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,2047,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,2047,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,4095,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,4095,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,4095,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,4095,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,8191,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,8191,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,8191,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,8191,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,16383,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,16383,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,16383,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,16383,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,32767,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,32767,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,32767,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,32767,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,float16,65535,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,1,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,float16,65535,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,128,1,float16,fp8,65535,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,64,0,1,float16,fp8,65535,0.019989332805077236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,3,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,3,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,7,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,7,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,7,0.010175999874869982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,15,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,15,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,31,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,31,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,63,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,63,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,63,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,127,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,127,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,127,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,127,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,255,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,255,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,255,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,511,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,511,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,2047,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,1023,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,4095,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,4095,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,2047,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,2047,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,2047,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,4095,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,4095,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,8191,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,8191,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,8191,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,8191,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,16383,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,16383,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,16383,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,16383,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,32767,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,32767,0.019621333728233974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,32767,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,1,0.008650666723648706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,32767,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,1,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,float16,65535,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,1,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,1,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,fp8,65535,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,0,1,float16,float16,65535,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,3,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,64,128,1,float16,fp8,65535,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,7,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,7,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,3,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,3,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,3,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,7,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,7,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,15,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,15,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,15,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,15,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,31,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,31,0.00860799973209699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,31,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,31,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,63,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,63,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,127,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,127,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,127,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,127,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,255,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,255,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,255,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,255,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,511,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,511,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,511,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,1023,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,2047,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,511,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,1023,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,4095,0.01748266691962878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,1023,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,1023,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,2047,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,8191,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,2047,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,2047,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,4095,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,4095,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,4095,0.03554133325815201
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,16383,0.07885333398977916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,8191,0.047968000173568726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,8191,0.021418665846188862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,8191,0.04795200129350027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,16383,0.017978666971127193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,16383,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,16383,0.07420800129572551
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,32767,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,32767,0.14109333356221518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,32767,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,32767,0.12550399700800577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,float16,65535,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,128,1,float16,fp8,65535,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,float16,65535,0.26359466711680096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,1,0.010288000106811523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,1,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,3,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,64,0,1,float16,fp8,65535,0.2278719941775004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,3,0.010224000240365664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,3,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,7,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,7,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,15,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,15,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,15,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,31,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,63,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,63,0.010314666976531347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,127,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,255,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,255,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,511,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,511,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,1023,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,1023,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,1023,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,2047,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,2047,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,2047,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,2047,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,4095,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,4095,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,4095,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,4095,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,8191,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,8191,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,8191,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,8191,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,16383,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,16383,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,16383,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,16383,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,32767,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,32767,0.020842666427294414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,32767,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,32767,0.019600000232458115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,1,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,float16,65535,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,fp8,65535,0.021488000949223835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,1,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,0,1,float16,float16,65535,0.022789334257443745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,64,128,1,float16,fp8,65535,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,3,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,7,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,7,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,7,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,7,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,15,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,15,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,15,0.00961599995692571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,31,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,31,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,63,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,63,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,63,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,127,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,127,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,127,0.009525333220760027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,255,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,255,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,255,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,511,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,511,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,511,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,1023,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,2047,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,1023,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,4095,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,2047,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,2047,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,2047,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,4095,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,4095,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,4095,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,8191,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,8191,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,8191,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,16383,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,8191,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,16383,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,16383,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,16383,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,32767,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,32767,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,32767,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,32767,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,float16,65535,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,1,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,1,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,float16,65535,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,1,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,128,1,float16,fp8,65535,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,64,0,1,float16,fp8,65535,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,7,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,7,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,7,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,15,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,15,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,15,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,63,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,31,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,63,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,63,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,127,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,127,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,127,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,255,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,255,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,255,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,511,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,511,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,511,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,1023,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,1023,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,2047,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,2047,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,2047,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,2047,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,4095,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,4095,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,8191,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,4095,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,4095,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,8191,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,8191,0.016480000068744022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,16383,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,8191,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,16383,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,16383,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,16383,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,32767,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,32767,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,32767,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,1,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,32767,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,1,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,fp8,65535,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,128,1,float16,float16,65535,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,1,0.023408000667889912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,1,0.02325333406527837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,3,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,float16,65535,0.03907199949026108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,64,0,1,float16,fp8,65535,0.030207999050617218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,3,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,3,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,3,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,7,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,7,0.02752533306678136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,7,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,7,0.024432001014550526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,15,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,15,0.029120000700155895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,31,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,63,0.03326933334271113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,15,0.024336000283559162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,15,0.023989332218964893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,31,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,31,0.033488000432650246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,127,0.033557333052158356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,31,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,63,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,63,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,63,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,127,0.03342399994532267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,127,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,127,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,255,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,255,0.0384853333234787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,255,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,1023,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,255,0.03559466699759165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,511,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,511,0.05593599875768026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,511,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,511,0.05203199883302053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,1023,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,1023,0.02937600016593933
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,2047,0.15479999780654907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,1023,0.0867146650950114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,2047,0.033285332222779594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,2047,0.15848533312479654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,2047,0.02922666569550832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,4095,0.03322133421897888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,4095,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,4095,0.297599991162618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,4095,0.2918826738993327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,float16,8191,0.5854453245798746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,float16,8191,0.03403199960788091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,1,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,128,1,float16,fp8,8191,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,1,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,64,0,1,float16,fp8,8191,0.5703680117925009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,3,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,15,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,7,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,7,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,31,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,15,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,15,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,31,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,63,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,63,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,127,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,255,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,127,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,255,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,1023,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,511,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,1023,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,1023,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,2047,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,2047,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,2047,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,2047,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,4095,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,float16,8191,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,4095,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,4095,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,4095,0.02405333270629247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,float16,8191,0.04357333481311798
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,128,1,float16,fp8,8191,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,1,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,1,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,64,0,1,float16,fp8,8191,0.03606399893760681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,1,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,1,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,7,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,3,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,3,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,3,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,7,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,15,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,7,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,31,0.011637333780527115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,7,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,15,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,15,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,15,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,31,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,31,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,127,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,31,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,63,0.011695999652147293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,63,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,63,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,255,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,63,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,127,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,127,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,127,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,255,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,255,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,1023,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,255,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,511,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,511,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,511,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,511,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,1023,0.016314666718244553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,1023,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,1023,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,2047,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,2047,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,2047,0.015674666812022526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,2047,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,4095,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,4095,0.039642666776975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,4095,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,1,0.01393066719174385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,4095,0.03319466610749563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,float16,8191,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,float16,8191,0.06280000011126201
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,3,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,128,1,float16,fp8,8191,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,1,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,64,0,1,float16,fp8,8191,0.05579733351866404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,1,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,1,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,3,0.014618666221698126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,3,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,7,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,3,0.0144213338692983
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,7,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,15,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,7,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,7,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,15,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,31,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,15,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,63,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,15,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,31,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,63,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,31,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,31,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,63,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,63,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,255,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,127,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,127,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,127,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,127,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,511,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,255,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,255,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,255,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,511,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,511,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,511,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,1023,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,1023,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,1023,0.014639999717473984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,1023,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,2047,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,2047,0.039919999738534294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,2047,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,2047,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,4095,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,4095,0.06304533282915752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,4095,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,4095,0.05573866764704386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,float16,8191,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,float16,8191,0.10719466209411621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,128,1,float16,fp8,8191,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,1,0.00874133345981439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,1,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,64,0,1,float16,fp8,8191,0.09482666850090027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,1,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,1,0.009578666960199675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,3,0.00867733359336853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,3,0.008693333094318708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,3,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,7,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,7,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,7,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,15,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,15,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,15,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,15,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,31,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,31,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,31,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,63,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,63,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,127,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,127,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,127,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,127,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,255,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,255,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,255,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,255,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,511,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,1023,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,511,0.02085866779088974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,511,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,511,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,2047,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,1023,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,4095,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,1023,0.02345066765944163
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,1023,0.02743999908367793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,2047,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,2047,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,2047,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,4095,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,4095,0.023200000325838726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,4095,0.041696002086003624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,16383,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,8191,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,8191,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,8191,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,8191,0.06234133243560791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,16383,0.10993599891662598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,16383,0.023290666441122692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,16383,0.09921600421269734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,32767,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,32767,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,32767,0.2032853364944458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,32767,0.17882132530212402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,float16,65535,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,1,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,128,1,float16,fp8,65535,0.023189333577950794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,3,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,float16,65535,0.3897973299026489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,64,0,1,float16,fp8,65535,0.33030933141708374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,15,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,63,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,127,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,127,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,255,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,511,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,511,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,1023,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,2047,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,2047,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,2047,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,2047,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,4095,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,4095,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,4095,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,4095,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,8191,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,8191,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,8191,0.011301333705584208
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,8191,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,16383,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,16383,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,16383,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,16383,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,32767,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,32767,0.02387733260790507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,32767,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,32767,0.021402666966120403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,1,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,1,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,float16,65535,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,128,1,float16,fp8,65535,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,3,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,1,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,fp8,65535,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,64,0,1,float16,float16,65535,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,3,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,7,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,7,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,15,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,15,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,31,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,63,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,63,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,63,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,127,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,127,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,127,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,511,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,511,0.01022933361430963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,255,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,255,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,1023,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,255,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,2047,0.011567999919255575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,1023,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,1023,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,2047,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,2047,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,2047,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,8191,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,4095,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,4095,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,4095,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,4095,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,16383,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,8191,0.017386666188637417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,8191,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,8191,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,16383,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,16383,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,16383,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,32767,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,32767,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,32767,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,32767,0.024117333193620045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,1,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,1,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,float16,65535,0.04035733391841253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,fp8,65535,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,0,1,float16,fp8,65535,0.031957333286603294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,64,128,1,float16,float16,65535,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,1,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,3,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,3,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,15,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,63,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,63,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,127,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,127,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,255,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,255,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,511,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,511,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,1023,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,2047,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,1023,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,1023,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,4095,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,1023,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,2047,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,2047,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,2047,0.014453332871198654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,4095,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,4095,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,4095,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,8191,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,16383,0.024186665813128155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,8191,0.018538666268189747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,8191,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,8191,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,16383,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,16383,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,16383,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,32767,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,32767,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,65535,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,float16,32767,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,32767,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,1,0.0498933345079422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,float16,65535,0.06224533418814341
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,128,1,float16,fp8,65535,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,1,0.04804266492525736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,1,0.039887999494870506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,64,0,1,float16,fp8,65535,0.05383466680844625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,1,0.03974399964014689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,3,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,3,0.0480373352766037
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,3,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,3,0.039808000127474465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,7,0.0499839981396993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,7,0.04971200227737427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,15,0.04177066683769226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,7,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,7,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,15,0.051776001850763954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,15,0.05170666674772898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,15,0.04367466767628988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,31,0.05992533266544342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,31,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,31,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,63,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,31,0.05386666456858317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,63,0.06010133524735769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,63,0.06025066475073496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,63,0.05385600030422211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,127,0.06028800209363302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,127,0.06002133091290792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,127,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,127,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,255,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,255,0.07050666709740956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,255,0.05402133365472158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,511,0.09724266330401103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,255,0.06410133341948192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,511,0.060309335589408875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,1023,0.17141334215799967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,511,0.10511466860771179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,511,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,1023,0.06118933359781901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,1023,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,1023,0.16476266582806906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,2047,0.06029866635799408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,float16,4095,0.060090666015942894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,2047,0.05363733569780985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,128,1,float16,fp8,4095,0.053914666175842285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,1,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,2047,0.3081706762313843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,2047,0.29993067185084027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,1,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,float16,4095,0.5902613401412964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,1,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,1,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,64,0,1,float16,fp8,4095,0.5723839998245239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,3,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,3,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,3,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,3,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,15,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,15,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,7,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,7,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,31,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,7,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,7,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,15,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,15,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,31,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,31,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,31,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,63,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,63,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,63,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,63,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,127,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,127,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,127,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,127,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,255,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,255,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,255,0.013327999661366144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,255,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,511,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,511,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,511,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,511,0.014186666657527288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,1023,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,1023,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,2047,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,1023,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,1023,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,2047,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,4095,0.04385066529115041
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,float16,2047,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,2047,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,float16,4095,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,128,1,float16,fp8,4095,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,1,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,64,0,1,float16,fp8,4095,0.037418665985266365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,1,0.01340266689658165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,3,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,1,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,1,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,3,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,7,0.013823999712864557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,3,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,3,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,7,0.01349866638580958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,7,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,7,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,15,0.013493333011865616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,31,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,15,0.01381333296497663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,15,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,15,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,31,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,63,0.013365333278973898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,31,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,31,0.013872000078360239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,63,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,63,0.014218666901191076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,63,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,127,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,127,0.013765333841244379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,127,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,127,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,255,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,255,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,255,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,255,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,511,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,511,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,511,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,511,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,1023,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,2047,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,1023,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,2047,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,1023,0.013957332819700241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,1023,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,float16,4095,0.06391466657320659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,2047,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,0,1,float16,fp8,4095,0.0572213331858317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,2047,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,1,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,float16,4095,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,64,128,1,float16,fp8,4095,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,1,0.017738666385412216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,1,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,1,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,3,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,7,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,3,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,3,0.01743999992807706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,3,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,7,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,7,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,7,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,15,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,31,0.017717332889636356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,15,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,31,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,15,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,15,0.0174346665541331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,63,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,31,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,31,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,63,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,127,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,63,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,63,0.01764800027012825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,127,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,127,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,127,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,255,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,255,0.017690667261679966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,255,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,255,0.017375999440749485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,511,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,511,0.024133334557215374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,1023,0.03146133323510488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,511,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,511,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,2047,0.0599839985370636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,1023,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,2047,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,1023,0.036720000207424164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,1023,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,2047,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,2047,0.022634667654832203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,float16,4095,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,float16,4095,0.09882666667302449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,128,1,float16,fp8,4095,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,64,0,1,float16,fp8,4095,0.08641067147254944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,1,0.09074667096138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,1,0.09065066774686177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,1,0.07421866556008656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,1,0.07574933270613353
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,3,0.09059733152389526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,7,0.09316800038019817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,3,0.09060800075531006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,3,0.07423466444015503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,3,0.07438399891058604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,7,0.09311466415723164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,7,0.07656533519426982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,7,0.0763626645008723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,15,0.09487467010815938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,15,0.09506133198738098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,31,0.11338133613268535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,31,0.10091200470924377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,15,0.07866133252779643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,15,0.07872533301512401
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,31,0.11417067050933838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,63,0.1011253297328949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,31,0.09956266482671101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,63,0.1135093371073405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,63,0.11365866661071777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,63,0.10085333387056987
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,127,0.11513599753379822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,127,0.11493333180745442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,127,0.10109866658846538
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,127,0.1009279986222585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,255,0.11529067158699036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,511,0.11528000235557556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,255,0.1344106694062551
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,255,0.10098666946093242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,255,0.12151466806729634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,511,0.1882986625035604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,511,0.20156800746917725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,511,0.10115200281143188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,fp8,1023,0.1011946698029836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,128,1,float16,float16,1023,0.11553600430488586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,float16,1023,0.3347466786702474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,1,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,64,0,1,float16,fp8,1023,0.3221333424250285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,3,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,1,0.01568000018596649
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,1,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,1,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,3,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,3,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,7,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,3,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,7,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,7,0.015487999965747198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,7,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,15,0.015722667177518208
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,15,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,15,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,15,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,31,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,31,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,31,0.014975999792416891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,31,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,63,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,63,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,63,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,63,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,127,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,127,0.015909332782030106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,127,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,127,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,255,0.016442666451136272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,255,0.015493333339691162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,255,0.015893333901961643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,255,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,511,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,511,0.020373333245515823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,511,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,511,0.019205333044131596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,float16,1023,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,float16,1023,0.026911998788515728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,1,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,128,1,float16,fp8,1023,0.016255999604860943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,64,0,1,float16,fp8,1023,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,3,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,1,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,1,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,7,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,7,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,1,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,3,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,15,0.01921066641807556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,3,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,3,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,7,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,7,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,15,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,15,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,15,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,31,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,31,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,63,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,31,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,127,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,31,0.020154666155576706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,63,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,63,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,127,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,63,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,127,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,127,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,255,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,255,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,255,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,255,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,511,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,511,0.024586667617162068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,511,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,511,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,float16,1023,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,float16,1023,0.035674666364987694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,128,1,float16,fp8,1023,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,1,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,64,0,1,float16,fp8,1023,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,1,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,1,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,1,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,3,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,3,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,3,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,3,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,7,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,7,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,7,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,7,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,15,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,15,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,15,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,15,0.025221332907676697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,31,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,31,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,31,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,31,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,63,0.027050666511058807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,63,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,63,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,63,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,127,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,127,0.027045334378878277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,255,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,255,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,127,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,127,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,255,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,255,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,511,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,511,0.04045866678158442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,511,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,511,0.03543466577927271
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,float16,1023,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,1,0.17497066656748453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,float16,1023,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,128,1,float16,fp8,1023,0.026015999416510265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,64,0,1,float16,fp8,1023,0.05374933282534281
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,1,0.17462400595347086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,3,0.1747573415438334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,1,0.1418613294760386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,1,0.14150933424631754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,3,0.14153599739074707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,3,0.17476266622543335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,3,0.1415786643822988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,7,0.18034666776657104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,7,0.1462399959564209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,7,0.18020800749460855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,7,0.14618666966756186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,15,0.1521813372770945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,15,0.18287465969721475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,15,0.18318400780359903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,15,0.15246933698654175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,31,0.21991467475891113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,31,0.22010133663813272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,31,0.19337066014607748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,31,0.19339199860890707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,63,0.22175999482472739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,63,0.2218773365020752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,63,0.19515732924143472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,63,0.19506667057673135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,127,0.22374399503072104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,127,0.19527999560038248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,127,0.2237280011177063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,127,0.19523199399312338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,255,0.22377600272496542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,255,0.2627519965171814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,255,0.19534399112065634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,255,0.2344640096028646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,float16,511,0.22374399503072104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,128,1,float16,fp8,511,0.19541333119074503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,float16,511,0.3959893385569255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,1,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,1,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,64,0,1,float16,fp8,511,0.3675413529078166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,1,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,1,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,3,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,3,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,3,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,3,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,7,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,7,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,7,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,7,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,15,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,15,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,15,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,15,0.021295999487241108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,31,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,31,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,63,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,31,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,31,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,63,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,63,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,63,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,127,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,127,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,127,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,255,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,127,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,255,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,255,0.02144533395767212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,255,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,float16,511,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,float16,511,0.02924266705910365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,0,1,float16,fp8,511,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,64,128,1,float16,fp8,511,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,1,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,1,0.026698666314284008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,1,0.029093332588672638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,1,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,3,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,3,0.029205332199732464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,3,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,7,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,3,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,7,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,7,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,7,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,15,0.029258665939172108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,15,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,15,0.02699733277161916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,15,0.027263998985290527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,31,0.0276053324341774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,31,0.027589333554108936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,31,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,31,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,63,0.027850667635599773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,63,0.029343999922275543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,63,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,63,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,127,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,127,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,255,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,127,0.027808000644048054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,127,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,255,0.028570666909217834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,255,0.02754666656255722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,255,0.02697066714366277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,float16,511,0.028442665934562683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,float16,511,0.04178666571776072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,128,1,float16,fp8,511,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,64,0,1,float16,fp8,511,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,1,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,1,0.04371733466784159
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,1,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,1,0.04132800052563349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,3,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,3,0.04557866851488749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,7,0.04389866689840952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,3,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,3,0.04171200096607208
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,15,0.04560533165931702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,7,0.044309332966804504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,7,0.041402667760849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,7,0.04168533285458883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,31,0.04548266530036926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,15,0.04573333263397217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,15,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,15,0.04204266766707102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,31,0.045567999283472695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,31,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,31,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,63,0.04151466737190882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,63,0.045653333266576133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,63,0.04576000074545542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,63,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,127,0.04561600089073181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,127,0.04557866851488749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,127,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,127,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,255,0.045552000403404236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,255,0.04730666677157084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,255,0.043706665436426796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,255,0.04224533339341482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,float16,511,0.04569066564242045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,float16,511,0.06829333305358887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,1,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,128,1,float16,fp8,511,0.04354666670163473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,3,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,1,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,64,0,1,float16,fp8,511,0.062224000692367554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,1,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,3,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,3,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,3,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,7,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,15,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,7,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,7,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,31,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,7,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,15,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,15,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,31,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,31,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,31,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,63,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,63,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,63,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,63,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,127,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,127,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,127,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,127,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,255,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,255,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,255,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,255,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,511,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,511,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,511,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,511,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,1023,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,1023,0.02295999974012375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,1023,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,1023,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,2047,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,2047,0.03307733436425527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,2047,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,2047,0.02923733244339625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,4095,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,4095,0.054133335749308266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,4095,0.012479999413092932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,4095,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,8191,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,8191,0.09481066465377808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,8191,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,8191,0.08871466914812724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,16383,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,16383,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,16383,0.1770346760749817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,16383,0.16795732577641806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,32767,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,32767,0.34082667032877606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,32767,0.012517333030700684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,1,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,32767,0.33952534198760986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,1,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,1,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,1,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,float16,65535,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,3,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,3,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,3,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,128,1,float16,fp8,65535,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,fp8,65535,0.7421546777089437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,7,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,64,0,1,float16,float16,65535,0.800335963567098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,15,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,15,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,31,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,63,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,63,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,127,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,255,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,511,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,255,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,2047,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,1023,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,4095,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,1023,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,2047,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,2047,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,4095,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,4095,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,4095,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,8191,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,8191,0.018645333747069042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,8191,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,8191,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,16383,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,16383,0.02183466653029124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,16383,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,16383,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,32767,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,32767,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,32767,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,32767,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,1,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,1,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,1,0.01027199998497963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,1,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,float16,65535,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,3,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,3,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,float16,65535,0.04417600234349569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,3,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,128,1,float16,fp8,65535,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,64,0,1,float16,fp8,65535,0.036714665591716766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,3,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,7,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,15,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,15,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,31,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,31,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,63,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,127,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,127,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,255,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,255,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,511,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,511,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,1023,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,1023,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,2047,0.014655999839305878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,1023,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,2047,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,4095,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,2047,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,2047,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,4095,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,4095,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,4095,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,8191,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,8191,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,8191,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,8191,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,16383,0.012608000387748083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,16383,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,16383,0.012400000045696894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,16383,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,32767,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,32767,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,32767,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,32767,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,1,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,1,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,float16,65535,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,float16,65535,0.06214400132497152
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,1,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,3,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,128,1,float16,fp8,65535,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,64,0,1,float16,fp8,65535,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,3,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,15,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,7,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,7,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,7,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,15,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,31,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,31,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,31,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,127,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,255,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,127,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,127,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,255,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,511,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,1023,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,511,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,2047,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,1023,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,2047,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,2047,0.015583999454975128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,2047,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,4095,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,4095,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,8191,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,4095,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,4095,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,16383,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,8191,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,8191,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,8191,0.021151999632517498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,16383,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,32767,0.062234664956728615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,16383,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,16383,0.031744000812371574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,32767,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,32767,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,32767,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,float16,65535,0.10919466614723206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,float16,65535,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,0,1,float16,fp8,65535,0.09095999598503113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,64,128,1,float16,fp8,65535,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,1,0.3429439862569173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,1,0.34280534585316974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,1,0.27562665939331055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,1,0.27696533997853595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,3,0.34299735228220624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,3,0.34252798557281494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,3,0.27538132667541504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,3,0.27506667375564575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,7,0.3531893491744995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,7,0.35337066650390625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,7,0.2853333353996277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,7,0.2856480081876119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,15,0.3617386817932129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,15,0.36075735092163086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,15,0.297818660736084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,15,0.29783467451731366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,31,0.43532268206278485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,31,0.4347093502680461
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,31,0.38165334860483807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,31,0.38145601749420166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,63,0.4371573527654012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,63,0.4370773235956828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,63,0.38356268405914307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,63,0.38344534238179523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,127,0.44094932079315186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,127,0.44091200828552246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,127,0.38519465923309326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,127,0.3855466842651367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,float16,255,0.4408213297526042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,float16,255,0.5206506649653116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,1,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,128,1,float16,fp8,255,0.38493335247039795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,1,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,64,0,1,float16,fp8,255,0.46192534764607746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,1,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,1,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,3,0.03401600072781245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,3,0.0335359995563825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,7,0.033957332372665405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,3,0.03214933226505915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,3,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,7,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,7,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,7,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,15,0.03408533334732056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,15,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,15,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,15,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,31,0.03366933266321818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,31,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,31,0.031632001201311745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,31,0.03346133232116699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,63,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,63,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,63,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,63,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,127,0.03430933256944021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,127,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,127,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,127,0.03145066648721695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,float16,255,0.03393599887688955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,float16,255,0.03379733363787333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,128,1,float16,fp8,255,0.03333866596221924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,64,0,1,float16,fp8,255,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,1,0.047600001096725464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,1,0.047637333472569786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,1,0.04398400088151296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,1,0.04394133388996124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,3,0.0476746658484141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,3,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,3,0.043968002001444496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,3,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,7,0.048991998036702476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,7,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,7,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,7,0.04364799956480662
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,15,0.047557334105173744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,15,0.04782933493455251
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,15,0.043605332573254905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,15,0.043978666265805565
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,31,0.04753600060939789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,31,0.04780266682306925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,31,0.04385599990685781
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,31,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,63,0.04764799773693085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,63,0.04752533137798309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,63,0.04375466704368591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,63,0.043935999274253845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,127,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,127,0.04765866696834564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,127,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,127,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,float16,255,0.04789333542188009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,float16,255,0.04905066887537638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,128,1,float16,fp8,255,0.045791998505592346
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,64,0,1,float16,fp8,255,0.0452106644709905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,1,0.08067733546098073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,1,0.08078933258851369
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,1,0.07437333464622498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,1,0.07427200178305308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,3,0.08060266574223836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,3,0.07453333338101704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,3,0.08041599889596303
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,3,0.07461866736412048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,7,0.08043733239173889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,7,0.0805920014778773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,7,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,7,0.07428800066312154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,15,0.0806879997253418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,15,0.0803306649128596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,15,0.07451733450094859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,15,0.0746666689713796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,31,0.08053333560625713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,31,0.08044266700744629
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,31,0.07454399764537811
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,31,0.0743093341588974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,63,0.0803466687599818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,63,0.08042133351167043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,63,0.07462400197982788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,63,0.07458666463692983
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,127,0.08235733211040497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,127,0.08264533181985219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,127,0.07427733143170674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,127,0.07452266911665599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,float16,255,0.08262933293978374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,float16,255,0.0823466678460439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,128,1,float16,fp8,255,0.07840533554553986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,64,0,1,float16,fp8,255,0.07656000057856242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,1,0.6784000396728516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,1,0.6783093611399332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,1,0.5463093519210815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,1,0.5466613372166952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,3,0.6788533528645834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,3,0.546288013458252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,3,0.6785333156585693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,3,0.5465493202209473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,7,0.7000319957733154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,7,0.6997120380401611
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,7,0.5660586754480997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,7,0.5658880074818929
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,15,0.716042677561442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,15,0.589850664138794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,15,0.71561066309611
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,15,0.5901866753896078
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,31,0.8623466491699219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,31,0.8624587059020996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,31,0.7566773096720377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,31,0.7569493452707926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,63,0.8675573666890463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,63,0.8675626913706461
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,63,0.7604586283365885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,63,0.760165294011434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,float16,127,0.8760639826456705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,1,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,1,0.05945600072542826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,float16,127,0.8736053307851156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,128,1,float16,fp8,127,0.7637653350830078
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,64,0,1,float16,fp8,127,0.7628373305002848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,1,0.05578133463859558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,1,0.05606399973233541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,3,0.058378666639328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,3,0.05910933514436086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,3,0.05597866574923197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,3,0.05587733288606008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,7,0.05845866600672404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,7,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,7,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,7,0.05603733162085215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,15,0.05899199843406677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,15,0.05952000121275584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,15,0.056176001826922096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,15,0.05579199890295664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,31,0.058559998869895935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,31,0.05801066756248474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,31,0.056143999099731445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,31,0.05619200070699056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,63,0.058965335289637245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,63,0.05983466903368632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,63,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,63,0.05573866764704386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,float16,127,0.05994133154551188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,float16,127,0.06065066655476888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,128,1,float16,fp8,127,0.05590933561325073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,64,0,1,float16,fp8,127,0.05644266804059347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,1,0.0865280032157898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,1,0.08658666412035625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,1,0.07868266602357228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,1,0.07880533238252004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,3,0.08655466636021932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,3,0.08654399712880452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,3,0.0804319977760315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,3,0.08041066428025563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,7,0.0865226686000824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,7,0.08658132950464885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,7,0.07869866490364075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,7,0.08018666505813599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,15,0.0864533285299937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,15,0.08679466446240743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,15,0.08081066608428955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,15,0.08037866652011871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,31,0.08663466572761536
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,31,0.08681066830952962
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,31,0.07904000083605449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,31,0.08038933575153351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,63,0.08648533622423808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,63,0.08635733524958293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,63,0.08055999875068665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,63,0.07997333506743114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,float16,127,0.08781333764394124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,float16,127,0.08685866991678874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,0,1,float16,fp8,127,0.0807360013326009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,1,0.15280000368754068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,1,0.15401066342989603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,1,0.14050666491190592
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,1,0.1400213340918223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,3,0.15244799852371216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,3,0.14245866735776266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,7,0.15403200189272562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,7,0.1530346671740214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,7,0.13991467157999674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,15,0.15407466888427734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,15,0.15405333042144775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,15,0.1400053302447001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,15,0.14004266262054443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,31,0.15397866566975912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,31,0.1402186652024587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,31,0.13984533150990805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,64,128,1,float16,fp8,127,0.08037333190441132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,3,0.1541759967803955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,63,0.15397866566975912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,63,0.1530453364054362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,63,0.14010666807492575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,63,0.14075733224550882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,1,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,127,0.15288533767064413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,float16,127,0.15226133664449057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,1,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,fp8,127,0.14095466335614523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,3,0.14005333185195923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,1,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,7,0.14013866583506265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,0,1,float16,fp8,127,0.14100799957911173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,1,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,3,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,3,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,3,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,3,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,7,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,7,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,7,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,7,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,64,128,1,float16,float16,31,0.1541813313961029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,15,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,15,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,15,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,15,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,31,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,31,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,31,0.016506666938463848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,31,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,63,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,63,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,63,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,63,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,127,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,127,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,255,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,127,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,127,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,255,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,255,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,255,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,511,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,511,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,511,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,511,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,1023,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,1023,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,1023,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,4095,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,1023,0.03350399931271871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,2047,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,2047,0.05273599922657013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,2047,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,2047,0.05172266562779745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,4095,0.09494400024414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,4095,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,8191,0.16686399777730307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,4095,0.09035733342170715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,16383,0.015237333873907724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,8191,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,16383,0.3542826573053996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,16383,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,8191,0.17893866697947183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,8191,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,16383,0.32128000259399414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,1,0.009973333527644476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,float16,32767,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,128,1,float16,fp8,32767,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,3,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,1,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,3,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,float16,32767,0.9754772981007894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,64,0,1,float16,fp8,32767,0.6708532969156901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,7,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,15,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,63,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,31,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,127,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,511,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,1023,0.01259200026591619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,2047,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,2047,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,511,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,4095,0.016783999900023144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,1023,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,1023,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,4095,0.015557333827018738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,2047,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,2047,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,8191,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,4095,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,4095,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,8191,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,8191,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,8191,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,16383,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,16383,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,16383,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,16383,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,float16,32767,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,float16,32767,0.04403733213742574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,1,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,128,1,float16,fp8,32767,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,1,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,1,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,64,0,1,float16,fp8,32767,0.03739733248949051
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,3,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,3,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,7,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,7,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,7,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,15,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,31,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,31,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,63,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,63,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,127,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,511,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,255,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,255,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,511,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,2047,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,1023,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,4095,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,1023,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,2047,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,2047,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,2047,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,4095,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,4095,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,4095,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,8191,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,8191,0.023738667368888855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,8191,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,8191,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,16383,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,16383,0.04062400013208389
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,16383,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,16383,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,float16,32767,0.06331199904282887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,float16,32767,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,128,1,float16,fp8,32767,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,64,0,1,float16,fp8,32767,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,3,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,3,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,3,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,15,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,7,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,7,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,15,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,15,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,31,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,31,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,63,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,63,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,127,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,255,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,255,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,1023,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,511,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,511,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,1023,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,2047,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,4095,0.023183998962243397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,2047,0.01806933308641116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,2047,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,2047,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,4095,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,4095,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,4095,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,8191,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,8191,0.03984533250331879
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,8191,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,8191,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,16383,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,16383,0.061199997862180076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,16383,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,float16,32767,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,16383,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,1,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,3,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,float16,32767,0.10689600308736165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,1,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,128,1,float16,fp8,32767,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,15,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,3,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,64,0,1,float16,fp8,32767,0.09132799506187439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,7,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,7,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,127,0.021557333568731945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,15,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,31,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,31,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,63,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,63,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,127,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,1023,0.08141866823037465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,255,0.03130666663249334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,255,0.029626667499542236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,511,0.048058668772379555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,511,0.04649066428343455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,1023,0.08426666259765625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,2047,0.15414933363596597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,2047,0.15036799510320029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,4095,0.2974613308906555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,4095,0.28933332363764447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,8191,0.5867520173390707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,8191,0.5663626591364542
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,1,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,3,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,float16,16383,1.4140747388203938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,1,128,0,1,float16,fp8,16383,1.4895680745442708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,31,0.011322667201360067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,127,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,127,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,1023,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,255,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,511,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,511,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,1023,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,2047,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,2047,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,4095,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,4095,0.021210665504137676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,8191,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,8191,0.027679999669392902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,float16,16383,0.060906668504079185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,1,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,2,128,0,1,float16,fp8,16383,0.043621331453323364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,3,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,63,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,7,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,15,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,15,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,31,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,127,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,511,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,511,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,1023,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,4095,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,2047,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,8191,0.039103999733924866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,2047,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,4095,0.03734400123357773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,8191,0.05880533158779144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,1,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,float16,16383,0.10045866171518962
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,1,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,4,128,0,1,float16,fp8,16383,0.05950400233268738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,3,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,3,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,7,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,7,0.012202666451533636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,15,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,31,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,255,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,63,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,511,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,127,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,127,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,255,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,511,0.01498666654030482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,1023,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,1023,0.017050666113694508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,2047,0.038773333032925926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,2047,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,4095,0.059749335050582886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,4095,0.03947199881076813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,8191,0.10192533334096272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,8191,0.06163200239340464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,1,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,1,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,float16,16383,0.18590933084487915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,32,8,128,0,1,float16,fp8,16383,0.1029866635799408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,3,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,3,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,7,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,7,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,15,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,15,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,127,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,31,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,63,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,63,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,127,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,255,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,2047,0.02700799951950709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,2047,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,255,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,511,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,511,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,8191,0.0540533314148585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,1023,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,1023,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,4095,0.03760000069936117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,4095,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,8191,0.05397333204746246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,16383,0.09098666906356812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,16383,0.08343467116355896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,32767,0.16570666432380676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,32767,0.144186665614446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,float16,65535,0.3141813278198242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,1,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,7,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,15,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,1,128,0,1,float16,fp8,65535,0.2688586711883545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,31,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,31,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,63,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,127,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,255,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,511,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,4095,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,2047,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,2047,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,4095,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,8191,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,8191,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,32767,0.024351999163627625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,16383,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,16383,0.02089600016673406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,32767,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,1,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,float16,65535,0.025807999074459076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,7,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,3,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,31,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,63,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,63,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,2,128,0,1,float16,fp8,65535,0.02550400048494339
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,7,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,255,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,511,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,1023,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,2047,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,2047,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,4095,0.013274667163689932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,4095,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,8191,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,8191,0.015840000162522
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,16383,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,16383,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,32767,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,32767,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,3,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,float16,65535,0.023887999355793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,7,0.010250666489203772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,4,128,0,1,float16,fp8,65535,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,15,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,15,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,63,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,63,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,127,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,255,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,511,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,4095,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,1023,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,2047,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,2047,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,4095,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,8191,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,8191,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,16383,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,16383,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,32767,0.02313599983851115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,32767,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,1,0.008581333483258883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,1,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,3,0.008602666358153025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,3,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,float16,65535,0.03845333307981491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,32,8,128,0,1,float16,fp8,65535,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,7,0.008709333216150602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,7,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,15,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,31,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,63,0.008714666590094566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,63,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,127,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,127,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,255,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,255,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,511,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,511,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,1023,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,1023,0.02736533433198929
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,2047,0.033370666205883026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,2047,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,4095,0.04805333415667216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,4095,0.04795733094215393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,8191,0.08029333253701527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,8191,0.07431999842325847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,16383,0.14085867007573447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,16383,0.1255626678466797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,32767,0.2646453380584717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,32767,0.22781866788864136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,float16,65535,0.5108960072199503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,1,128,0,1,float16,fp8,65535,0.43106667200724286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,63,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,2047,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,2047,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,511,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,1023,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,8191,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,1023,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,4095,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,16383,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,4095,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,8191,0.01785600061217944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,16383,0.020421333611011505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,32767,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,1,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,32767,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,1,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,15,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,float16,65535,0.02500266581773758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,2,128,0,1,float16,fp8,65535,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,15,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,31,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,63,0.01044800008336703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,127,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,127,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,255,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,255,0.009279999881982803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,2047,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,4095,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,511,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,511,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,16383,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,1023,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,16383,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,2047,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,32767,0.023797333240509033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,4095,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,8191,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,8191,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,32767,0.020432000358899433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,1,0.009594666461149851
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,3,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,float16,65535,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,7,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,4,128,0,1,float16,fp8,65535,0.02532266577084859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,63,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,7,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,15,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,127,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,255,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,4095,0.016714667280515034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,1023,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,2047,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,2047,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,16383,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,4095,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,8191,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,8191,0.01648533344268799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,16383,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,32767,0.038549333810806274
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,32767,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,1,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,1,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,7,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,3,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,fp8,65535,0.03755199909210205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,32,8,128,0,1,float16,float16,65535,0.05789333085219065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,3,0.02383466561635335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,7,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,15,0.03163733333349228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,15,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,31,0.03197866678237915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,127,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,31,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,63,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,63,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,511,0.08565866947174072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,127,0.03781333317359289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,255,0.05570666491985321
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,1023,0.15239999691645303
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,255,0.052069331208864846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,511,0.08880000313123067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,1023,0.15839999914169312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,2047,0.29765333731969196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,2047,0.2856053312619527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,4095,0.5537493228912354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,4095,0.5827946662902832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,3,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,3,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,15,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,float16,8191,1.1687626838684082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,15,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,127,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,31,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,1,128,0,1,float16,fp8,8191,1.0973440011342366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,127,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,1023,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,255,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,511,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,511,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,1023,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,2047,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,2047,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,4095,0.03867733230193456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,4095,0.026741333305835724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,float16,8191,0.0603359987338384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,1,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,1,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,2,128,0,1,float16,fp8,8191,0.04348266621430715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,3,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,3,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,7,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,7,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,15,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,127,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,127,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,15,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,31,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,31,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,511,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,63,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,1023,0.016554666062196095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,63,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,2047,0.03754133234421412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,2047,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,255,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,255,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,511,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,1023,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,4095,0.05967999994754791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,4095,0.03961066653331121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,1,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,float16,8191,0.10072533289591472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,7,0.013546666751305262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,1,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,4,128,0,1,float16,fp8,8191,0.061808000008265175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,3,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,3,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,7,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,15,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,15,0.013258667041858038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,127,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,31,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,31,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,63,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,511,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,63,0.016122666498025257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,127,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,255,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,255,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,511,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,1023,0.03527999917666117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,1023,0.02293866624434789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,2047,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,2047,0.04065066576004028
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,4095,0.1018933355808258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,4095,0.062208001812299095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,1,0.0086666668454806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,float16,8191,0.18553600708643594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,7,0.008570666735370954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,7,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,32,8,128,0,1,float16,fp8,8191,0.10481066505114238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,3,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,3,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,15,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,31,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,63,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,63,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,127,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,127,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,255,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,255,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,511,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,511,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,1023,0.029653333127498627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,1023,0.03136533250411352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,2047,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,2047,0.04170133173465729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,4095,0.06410666803518932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,16383,0.2036799987157186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,4095,0.06019733349482218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,8191,0.11147733529408772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,8191,0.09929600358009338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,16383,0.17669334014256796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,1,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,32767,0.38893866539001465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,1,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,32767,0.3288639982541402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,3,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,fp8,65535,0.6378986835479736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,15,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,1,128,0,1,float16,float16,65535,0.7619413534800211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,31,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,255,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,511,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,4095,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,8191,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,1023,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,1023,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,2047,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,2047,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,4095,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,8191,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,16383,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,16383,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,32767,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,32767,0.02426133304834366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,1,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,3,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,fp8,65535,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,2,128,0,1,float16,float16,65535,0.0432533323764801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,7,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,15,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,15,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,31,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,31,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,127,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,127,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,2047,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,255,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,4095,0.018474667022625606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,1023,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,1023,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,2047,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,4095,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,8191,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,8191,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,16383,0.024085332949956257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,16383,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,32767,0.03972800076007843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,32767,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,1,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,1,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,float16,65535,0.05925333499908447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,3,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,4,128,0,1,float16,fp8,65535,0.039333333571751915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,15,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,31,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,63,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,63,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,127,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,255,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,1023,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,1023,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,2047,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,2047,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,4095,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,16383,0.037471999724706016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,4095,0.015146666516860327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,8191,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,8191,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,32767,0.03941333293914795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,16383,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,32767,0.05931733548641205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,1,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,1,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,3,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,3,0.04141333450873693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,float16,65535,0.10114666819572449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,7,0.04997866849104563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,31,0.053583999474843345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,32,8,128,0,1,float16,fp8,65535,0.06017066538333893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,63,0.06012799839178721
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,7,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,15,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,15,0.05201066533724467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,31,0.059978668888409935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,63,0.053770666321118675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,127,0.07020799815654755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,127,0.06442133088906606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,255,0.10331733028093974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,255,0.09714667002360027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,511,0.1726613243420919
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,511,0.16237866878509521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,1023,0.30984000364939374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,1023,0.29368533690770465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,2047,0.5913333495457967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,2047,0.5561973253885905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,1,0.013829333086808523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,1,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,3,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,3,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,15,0.013408000270525614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,7,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,float16,4095,1.1633333365122478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,7,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,1,128,0,1,float16,fp8,4095,1.0924479961395264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,15,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,31,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,31,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,255,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,63,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,63,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,127,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,127,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,255,0.01349866638580958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,511,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,511,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,1023,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,4095,0.06190933287143707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,1023,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,2047,0.02790933350721995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,1,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,fp8,4095,0.044213334719340004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,1,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,3,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,3,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,31,0.01504533365368843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,7,0.01524266724785169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,63,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,7,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,15,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,15,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,31,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,63,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,127,0.015541333705186844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,127,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,255,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,255,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,511,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,511,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,1023,0.03516799956560135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,1023,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,2047,0.06051200131575266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,2047,0.041349334021409355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,float16,4095,0.10288533568382263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,1,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,1,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,4,128,0,1,float16,fp8,4095,0.06286933521429698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,3,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,15,0.020714666694402695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,3,0.019765333582957584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,15,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,7,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,7,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,31,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,2,128,0,1,float16,float16,2047,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,31,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,63,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,63,0.018746666610240936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,255,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,127,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,127,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,255,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,511,0.03366400053103765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,511,0.024122667809327442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,1023,0.053690666953722634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,1023,0.03589866558710734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,2047,0.0957973301410675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,2047,0.059989333152770996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,float16,4095,0.1732693314552307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,32,8,128,0,1,float16,fp8,4095,0.09604266285896301
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,1,0.08861866593360901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,1,0.07456533114115398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,3,0.0913759966691335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,3,0.0766293356815974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,7,0.08059733112653096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,7,0.09311999877293904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,15,0.11283733447392781
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,15,0.09922666351000468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,31,0.11348799864451091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,31,0.10086933771769206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,63,0.11353600025177002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,63,0.10096533099810283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,127,0.1339359978834788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,127,0.12185066938400269
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,255,0.20122132698694864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,255,0.18683733542760214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,511,0.3346773386001587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,511,0.31798932949701947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,1,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,1,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,float16,1023,0.6199680169423422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,7,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,1,128,0,1,float16,fp8,1023,0.576314647992452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,3,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,31,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,3,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,63,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,7,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,15,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,15,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,31,0.016794666647911072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,63,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,127,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,127,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,255,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,1023,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,255,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,511,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,fp8,511,0.02110933264096578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,2,128,0,1,float16,float16,1023,0.03595199932654699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,1,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,1,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,3,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,3,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,31,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,7,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,7,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,15,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,15,0.01877333347996076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,31,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,63,0.021168000996112823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,63,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,127,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,127,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,1023,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,255,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,255,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,float16,511,0.034645333886146545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,511,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,4,128,0,1,float16,fp8,1023,0.03701333453257879
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,1,0.029418667157491047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,1,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,3,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,3,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,7,0.030373332401116688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,31,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,7,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,15,0.029557332396507263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,15,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,31,0.029253333806991577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,63,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,255,0.0336053321758906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,255,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,63,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,127,0.02992533395687739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,127,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,1023,0.09468799829483032
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,float16,511,0.05541333556175232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,511,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,32,8,128,0,1,float16,fp8,1023,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,1,0.17060265938440958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,1,0.14225066701571146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,3,0.17669866482416788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,3,0.14807466665903726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,7,0.18075732390085855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,15,0.2197386622428894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,7,0.1541866660118103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,15,0.1930720011393229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,31,0.22004799048105875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,31,0.19301867485046387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,63,0.22181334098180136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,63,0.19412267208099365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,127,0.26057066520055133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,127,0.23778667052586874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,255,0.3959466616312663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,255,0.3656160036722819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,1,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,1,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,float16,511,0.6782879829406738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,3,0.025120000044504803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,1,128,0,1,float16,fp8,511,0.6230613390604655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,3,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,15,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,7,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,7,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,15,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,31,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,31,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,127,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,63,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,255,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,63,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,127,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,255,0.02495466669400533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,float16,511,0.038160001238187156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,2,128,0,1,float16,fp8,511,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,1,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,1,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,3,0.03320533285538355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,3,0.029450667401154835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,7,0.03152533372243246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,7,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,15,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,15,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,31,0.033226666351159416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,31,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,127,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,63,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,63,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,127,0.03214933226505915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,255,0.03745066622893015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,255,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,float16,511,0.05813866853713989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,1,0.05193066596984863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,4,128,0,1,float16,fp8,511,0.04206933577855428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,1,0.045461331804593406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,7,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,3,0.05231466889381409
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,3,0.045706664522488914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,31,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,7,0.05169066786766052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,63,0.05179733534653982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,15,0.051685333251953125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,15,0.04543466866016388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,31,0.045552000403404236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,63,0.04584533472855886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,127,0.0525439977645874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,127,0.04549333453178406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,255,0.057946667075157166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,255,0.0479360024134318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,1,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,7,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,float16,511,0.09776000181833903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,1,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,32,8,128,0,1,float16,fp8,511,0.07009600102901459
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,31,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,31,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,3,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,3,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,7,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,15,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,255,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,63,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,127,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,127,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,255,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,2047,0.053685332338015236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,511,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,511,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,1023,0.033200000723203026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,1023,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,2047,0.04971733192602793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,4095,0.09473066528638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,4095,0.08860799670219421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,8191,0.17658666769663492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,8191,0.16656532883644104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,16383,0.34089601039886475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,16383,0.32344533999760944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,1,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,32767,0.6896426677703857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,32767,0.7310986518859863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,15,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,127,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,float16,65535,1.7520853678385417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,1,128,0,1,float16,fp8,65535,1.5552372932434082
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,511,0.011861333002646765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,511,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,1023,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,1023,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,2047,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,2047,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,4095,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,4095,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,8191,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,8191,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,32767,0.04144533226887385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,16383,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,16383,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,32767,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,1,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,3,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,7,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,float16,65535,0.06157866617043813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,2,128,0,1,float16,fp8,65535,0.04417600234349569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,127,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,255,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,255,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,1023,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,2047,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,2047,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,4095,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,4095,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,8191,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,8191,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,16383,0.03733866661787033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,16383,0.023344000180562336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,32767,0.0582239975531896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,32767,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,1,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,3,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,3,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,float16,65535,0.10082133611043294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,4,128,0,1,float16,fp8,65535,0.06133866806825002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,31,0.01022933361430963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,63,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,63,0.012047999848922094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,127,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,1023,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,255,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,2047,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,511,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,1023,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,2047,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,4095,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,4095,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,8191,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,8191,0.02493866781393687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,16383,0.05957333246866862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,16383,0.039621333281199135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,32767,0.062021334966023765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,32767,0.10194666186968486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,float16,65535,0.18522665898005167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,1,0.33401068051656085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,1,0.27911466360092163
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,32,8,128,0,1,float16,fp8,65535,0.10373866558074951
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,3,0.34882664680480957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,3,0.28940266370773315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,7,0.3570079803466797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,7,0.3038986722628276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,15,0.43297600746154785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,15,0.37970133622487384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,31,0.4339840014775594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,31,0.3813386758168538
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,63,0.43675732612609863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,63,0.3819466829299927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,127,0.5148213307062784
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,127,0.46747199694315594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,1,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,1,0.03748800108830134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,3,0.04080000023047129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,float16,255,0.811247984568278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,1,128,0,1,float16,fp8,255,0.7233493328094482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,3,0.03730133424202601
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,7,0.039813332259655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,7,0.037402667105197906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,15,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,15,0.03751466671625773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,31,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,31,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,63,0.0401706670721372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,63,0.037317333122094475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,127,0.04074666649103165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,127,0.03742400060097376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,float16,255,0.04282666742801666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,1,0.05605866511662801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,2,128,0,1,float16,fp8,255,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,1,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,3,0.057631999254226685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,3,0.049829334020614624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,7,0.05612266560395559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,7,0.04995200037956238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,15,0.056101332108179726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,15,0.04906133313973745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,31,0.05788266658782959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,31,0.049642667174339294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,63,0.05682666599750519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,63,0.04957866668701172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,127,0.05825066566467285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,127,0.05021866659323374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,float16,255,0.06193066636721293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,4,128,0,1,float16,fp8,255,0.05175999800364176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,1,0.08205333352088928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,1,0.09507733583450317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,3,0.09541333715120952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,3,0.08239999910195668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,7,0.09506133198738098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,7,0.08102400104204814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,15,0.09485866626103719
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,15,0.08080533146858215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,31,0.09489066402117412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,31,0.08184533317883809
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,63,0.09491200248400371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,63,0.08142399787902832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,127,0.09515200058619182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,127,0.0827466646830241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,float16,255,0.10282666484514873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,32,8,128,0,1,float16,fp8,255,0.08357866605122884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,1,0.6599466800689697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,1,0.5495893160502116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,3,0.6929546991984049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,3,0.5724106629689535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,7,0.7074986298878988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,7,0.6008373498916626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,15,0.8611146608988444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,15,0.7539040247599283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,31,0.8624959786732992
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,31,0.756223996480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,63,0.8731839656829834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,1,0.07233599821726482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,1,0.06608533362547557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,63,0.7580107053120931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,float16,127,1.0724373658498128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,1,128,0,1,float16,fp8,127,0.9382932980855306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,3,0.07239466905593872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,3,0.06604800124963124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,7,0.0717439999183019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,7,0.06606400012969971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,15,0.07222400108973186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,15,0.06635199983914693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,31,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,31,0.06509333352247874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,63,0.07208000123500824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,63,0.06625066697597504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,float16,127,0.07210666437943776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,2,128,0,1,float16,fp8,127,0.06814399858315785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,1,0.10351999600728352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,1,0.09098133444786072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,3,0.10307733217875163
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,3,0.09085333347320557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,7,0.10289067029953003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,7,0.0906880001227061
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,15,0.1032373309135437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,15,0.09123200178146362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,31,0.10288533568382263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,31,0.09061866998672485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,63,0.10339200496673584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,63,0.09121599793434143
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,float16,127,0.10294933120409648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,4,128,0,1,float16,fp8,127,0.09216533104578654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,1,0.17824000120162964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,1,0.1535360018412272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,3,0.1786080002784729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,3,0.15404799580574036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,7,0.17880533138910928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,7,0.15366933743158975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,15,0.17840532461802164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,15,0.15291200081507364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,31,0.17814399798711142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,1,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,31,0.15362667044003805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,1,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,3,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,3,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,63,0.15428800384203592
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,63,0.17875200510025024
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,7,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,7,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,15,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,float16,127,0.17713600397109985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,15,0.01533866673707962
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,32,8,128,0,1,float16,fp8,127,0.15407466888427734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,31,0.014629332969586054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,31,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,63,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,63,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,127,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,127,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,255,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,255,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,511,0.031504000226656594
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,511,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,1023,0.05197866757710775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,1023,0.05199466645717621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,2047,0.09403733412424724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,2047,0.09106133381525676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,4095,0.17884800831476846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,4095,0.16726932922999063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,8191,0.34692267576853436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,8191,0.3200160066286723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,1,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,16383,0.7024266719818115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,16383,0.6239466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,31,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,15,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,31,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,fp8,32767,1.4940427144368489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,63,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,511,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,127,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,1,128,0,1,float16,float16,32767,2.4689332644144693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,2047,0.015546667079130808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,4095,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,511,0.012442667037248611
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,1023,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,1023,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,2047,0.015605332950750986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,4095,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,8191,0.022661333282788593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,8191,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,16383,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,16383,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,1,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,1,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,fp8,32767,0.04391466577847799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,2,128,0,1,float16,float16,32767,0.060362666845321655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,7,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,15,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,31,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,255,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,63,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,255,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,511,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,1023,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,1023,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,2047,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,2047,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,4095,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,4095,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,8191,0.039503999054431915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,8191,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,16383,0.059792002042134605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,16383,0.03937600056330363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,1,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,fp8,32767,0.060922667384147644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,3,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,4,128,0,1,float16,float16,32767,0.1014400025208791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,15,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,31,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,511,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,1023,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,127,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,1023,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,2047,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,2047,0.01762666677435239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,4095,0.03799466788768768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,4095,0.02404800057411194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,8191,0.05907199780146281
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,8191,0.03774933268626531
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,16383,0.10127466917037964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,16383,0.05926933387915293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,float16,32767,0.1834239959716797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,32,8,128,0,1,float16,fp8,32767,0.1011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,1,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,3,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,3,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,7,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,15,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,7,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,7,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,15,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,31,0.009989333028594652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,31,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,63,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,63,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,63,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,127,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,127,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,127,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,255,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,1023,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,1023,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,1023,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,4095,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,1023,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,2047,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,2047,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,2047,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,4095,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,4095,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,4095,0.016762666404247284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,8191,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,8191,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,8191,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,8191,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,16383,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,16383,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,16383,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,16383,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,1,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,float16,32767,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,float16,32767,0.04560533165931702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,128,1,float16,fp8,32767,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,64,0,1,float16,fp8,32767,0.036362667878468834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,3,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,3,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,15,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,15,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,31,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,31,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,127,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,127,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,255,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,255,0.010501333822806677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,255,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,511,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,511,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,511,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,1023,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,1023,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,1023,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,2047,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,2047,0.018426666657129925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,2047,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,2047,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,8191,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,4095,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,4095,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,4095,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,4095,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,8191,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,8191,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,16383,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,8191,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,16383,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,16383,0.03990933299064636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,16383,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,float16,32767,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,1,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,128,1,float16,fp8,32767,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,float16,32767,0.06408533453941345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,64,0,1,float16,fp8,32767,0.05477866530418396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,3,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,7,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,15,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,15,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,15,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,63,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,127,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,127,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,255,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,255,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,1023,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,1023,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,1023,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,2047,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,2047,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,2047,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,2047,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,4095,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,4095,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,8191,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,4095,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,4095,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,8191,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,8191,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,8191,0.031557333966096245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,16383,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,16383,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,16383,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,16383,0.05400000015894572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,1,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,1,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,float16,32767,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,1,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,128,1,float16,fp8,32767,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,float16,32767,0.10602666934331258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,1,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,64,0,1,float16,fp8,32767,0.09252267082532246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,3,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,3,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,7,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,7,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,15,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,15,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,15,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,63,0.011711999773979187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,15,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,31,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,63,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,31,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,31,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,31,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,127,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,127,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,127,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,127,0.011994666109482447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,255,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,255,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,255,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,1023,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,255,0.012085333466529846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,511,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,511,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,2047,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,1023,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,1023,0.01590399940808614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,4095,0.014783999572197596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,1023,0.015498666713635126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,2047,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,2047,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,2047,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,4095,0.03979733337958654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,4095,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,4095,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,8191,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,16383,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,8191,0.06330133477846782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,8191,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,8191,0.05579199890295664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,16383,0.10708266496658325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,16383,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,16383,0.09291199843088786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,1,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,float16,32767,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,1,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,1,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,float16,32767,0.19715199867884317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,128,1,float16,fp8,32767,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,1,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,64,0,1,float16,fp8,32767,0.16877333323160806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,3,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,3,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,3,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,3,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,7,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,7,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,15,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,15,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,31,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,63,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,63,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,63,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,127,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,127,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,127,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,127,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,255,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,255,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,511,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,1023,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,1023,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,2047,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,2047,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,2047,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,2047,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,4095,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,4095,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,4095,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,4095,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,8191,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,16383,0.019317333896954853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,8191,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,8191,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,8191,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,16383,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,16383,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,16383,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,32767,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,32767,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,32767,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,32767,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,65535,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,65535,0.02943466603755951
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,1,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,65535,0.029765332738558452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,65535,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,1,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,1,0.01032533310353756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,3,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,3,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,3,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,7,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,float16,131071,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,7,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,float16,131071,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,128,1,float16,fp8,131071,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,15,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,64,0,1,float16,fp8,131071,0.03346666693687439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,15,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,15,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,31,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,31,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,31,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,63,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,63,0.009962666779756546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,127,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,127,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,255,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,255,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,1023,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,2047,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,1023,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,1023,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,1023,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,4095,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,4095,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,2047,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,2047,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,2047,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,4095,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,4095,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,8191,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,8191,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,8191,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,16383,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,8191,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,16383,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,16383,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,16383,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,32767,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,32767,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,32767,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,32767,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,65535,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,65535,0.023178666830062866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,65535,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,65535,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,1,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,1,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,float16,131071,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,3,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,3,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,3,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,7,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,float16,131071,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,7,0.009237333511312803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,128,1,float16,fp8,131071,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,7,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,7,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,64,0,1,float16,fp8,131071,0.02672533442576726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,15,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,15,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,15,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,31,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,31,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,31,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,63,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,63,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,63,0.009509333098928133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,127,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,127,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,127,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,255,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,255,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,255,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,511,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,511,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,1023,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,1023,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,4095,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,2047,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,2047,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,2047,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,2047,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,4095,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,4095,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,4095,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,16383,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,16383,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,8191,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,8191,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,8191,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,8191,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,16383,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,16383,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,32767,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,32767,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,32767,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,32767,0.017914666483799618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,65535,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,65535,0.02218666672706604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,65535,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,1,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,65535,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,1,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,3,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,3,0.010181333248813948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,fp8,131071,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,3,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,128,1,float16,float16,131071,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,7,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,float16,131071,0.027535999814669292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,15,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,7,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,15,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,64,0,1,float16,fp8,131071,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,15,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,15,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,31,0.0099093330403169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,31,0.010453333457310995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,31,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,63,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,127,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,127,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,127,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,255,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,255,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,255,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,511,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,511,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,1023,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,1023,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,2047,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,1023,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,4095,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,1023,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,2047,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,2047,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,2047,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,4095,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,4095,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,4095,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,8191,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,8191,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,8191,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,8191,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,16383,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,32767,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,16383,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,16383,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,16383,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,32767,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,32767,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,32767,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,65535,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,65535,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,65535,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,65535,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,1,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,1,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,3,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,3,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,float16,131071,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,float16,131071,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,7,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,0,1,float16,fp8,131071,0.033520000676314034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,64,128,1,float16,fp8,131071,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,31,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,7,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,15,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,15,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,15,0.016800000021855038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,127,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,63,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,63,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,63,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,63,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,127,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,255,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,127,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,255,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,255,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,511,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,511,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,511,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,1023,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,2047,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,1023,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,1023,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,4095,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,1023,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,4095,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,2047,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,2047,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,2047,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,4095,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,4095,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,8191,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,16383,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,8191,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,8191,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,8191,0.01657066618402799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,16383,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,16383,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,16383,0.02082666630546252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,32767,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,32767,0.022783999641736347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,32767,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,32767,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,65535,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,65535,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,65535,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,65535,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,1,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,1,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,1,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,1,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,3,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,3,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,3,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,3,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,7,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,7,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,7,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,fp8,131071,0.02757866680622101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,15,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,float16,131071,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,0,1,float16,float16,131071,0.02942933390537898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,15,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,64,128,1,float16,fp8,131071,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,15,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,31,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,31,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,31,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,63,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,63,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,63,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,127,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,127,0.01003200002014637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,127,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,127,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,255,0.008821333448092142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,255,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,255,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,511,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,1023,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,1023,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,1023,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,4095,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,2047,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,2047,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,2047,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,2047,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,4095,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,4095,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,4095,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,16383,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,8191,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,8191,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,8191,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,8191,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,16383,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,16383,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,16383,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,32767,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,32767,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,32767,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,32767,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,65535,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,65535,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,65535,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,1,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,65535,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,1,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,3,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,3,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,float16,131071,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,3,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,3,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,7,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,128,1,float16,fp8,131071,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,float16,131071,0.029472000896930695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,7,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,15,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,64,0,1,float16,fp8,131071,0.027450665831565857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,15,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,31,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,31,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,31,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,63,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,63,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,127,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,127,0.009999999776482582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,255,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,255,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,511,0.009925333162148794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,1023,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,1023,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,2047,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,2047,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,2047,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,2047,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,4095,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,4095,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,4095,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,4095,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,8191,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,8191,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,8191,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,8191,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,16383,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,16383,0.017498667041460674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,16383,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,16383,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,32767,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,32767,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,32767,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,32767,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,65535,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,65535,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,65535,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,1,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,65535,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,3,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,fp8,131071,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,7,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,7,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,15,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,128,1,float16,float16,131071,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,15,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,15,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,7,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,float16,131071,0.04330133398373922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,63,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,15,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,64,0,1,float16,fp8,131071,0.03365866591533025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,31,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,31,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,31,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,63,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,63,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,63,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,127,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,127,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,127,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,255,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,511,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,255,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,255,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,255,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,511,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,511,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,511,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,1023,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,1023,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,1023,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,2047,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,2047,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,2047,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,4095,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,2047,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,4095,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,4095,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,4095,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,8191,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,8191,0.015413332730531693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,8191,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,8191,0.0161013330022494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,16383,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,16383,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,16383,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,16383,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,32767,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,32767,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,32767,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,32767,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,65535,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,65535,0.03948266555865606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,1,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,65535,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,65535,0.03133333226044973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,1,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,3,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,float16,131071,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,3,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,7,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,7,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,fp8,131071,0.053717335065205894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,0,1,float16,float16,131071,0.06177600224812826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,64,128,1,float16,fp8,131071,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,15,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,31,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,63,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,127,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,255,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,511,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,511,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,1023,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,1023,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,4095,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,2047,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,2047,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,4095,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,2047,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,2047,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,8191,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,8191,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,4095,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,4095,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,8191,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,8191,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,float16,16383,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,3,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,float16,16383,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,128,1,float16,fp8,16383,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,1,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,64,0,1,float16,fp8,16383,0.03713600089152654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,1,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,7,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,3,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,7,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,15,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,31,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,63,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,63,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,127,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,127,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,127,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,255,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,511,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,511,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,1023,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,1023,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,1023,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,2047,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,2047,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,2047,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,2047,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,4095,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,4095,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,4095,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,4095,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,8191,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,8191,0.03935466706752777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,8191,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,8191,0.032058666149775185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,fp8,16383,0.05426133175690969
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,float16,16383,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,3,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,1,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,128,1,float16,fp8,16383,0.013418667018413544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,64,0,1,float16,float16,16383,0.0620000014702479
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,1,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,1,0.012133333832025528
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,3,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,3,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,3,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,7,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,7,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,7,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,31,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,7,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,15,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,15,0.011653333902359009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,63,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,63,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,15,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,15,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,31,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,127,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,63,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,63,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,127,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,127,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,511,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,127,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,255,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,255,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,1023,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,1023,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,255,0.011546666423479715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,511,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,511,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,511,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,1023,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,1023,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,2047,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,2047,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,2047,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,2047,0.022997332115968067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,4095,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,4095,0.03977066775163015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,4095,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,4095,0.03283733377854029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,8191,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,8191,0.062080000837643944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,8191,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,8191,0.055445333321889244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,float16,16383,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,float16,16383,0.10745599865913391
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,1,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,128,1,float16,fp8,16383,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,1,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,1,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,64,0,1,float16,fp8,16383,0.0932373305161794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,1,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,3,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,3,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,3,0.013552000125249227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,3,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,7,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,15,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,7,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,7,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,7,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,15,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,15,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,15,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,31,0.014175999909639359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,31,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,31,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,63,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,31,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,63,0.013264000415802002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,63,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,127,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,63,0.013717333475748697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,127,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,127,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,127,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,255,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,511,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,255,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,255,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,255,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,511,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,511,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,511,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,2047,0.04009066770474116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,1023,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,2047,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,1023,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,1023,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,1023,0.021615999440352123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,2047,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,2047,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,4095,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,4095,0.062234664956728615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,4095,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,4095,0.0558186670144399
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,8191,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,8191,0.10734400153160095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,float16,16383,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,8191,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,8191,0.09464533130327861
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,1,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,1,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,1,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,128,1,float16,fp8,16383,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,float16,16383,0.19742933909098306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,64,0,1,float16,fp8,16383,0.16952000061670938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,7,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,3,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,7,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,15,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,31,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,15,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,63,0.010122666756312052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,31,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,31,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,31,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,63,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,127,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,127,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,255,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,255,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,255,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,255,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,511,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,2047,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,2047,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,511,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,1023,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,1023,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,1023,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,1023,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,2047,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,2047,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,8191,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,4095,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,4095,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,4095,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,4095,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,8191,0.017344000438849132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,8191,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,8191,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,16383,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,16383,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,16383,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,16383,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,32767,0.021407999098300934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,32767,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,32767,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,32767,0.021253332495689392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,65535,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,65535,0.024693332612514496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,65535,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,65535,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,1,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,3,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,3,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,7,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,7,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,float16,131071,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,float16,131071,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,128,1,float16,fp8,131071,0.013909333695967993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,15,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,64,0,1,float16,fp8,131071,0.030960001051425934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,31,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,31,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,31,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,63,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,127,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,127,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,127,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,127,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,255,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,255,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,255,0.01033599985142549
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,1023,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,511,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,511,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,2047,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,511,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,511,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,1023,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,1023,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,4095,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,2047,0.012015999605258306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,2047,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,2047,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,4095,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,4095,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,4095,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,16383,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,8191,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,8191,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,8191,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,8191,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,16383,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,16383,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,16383,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,32767,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,32767,0.021722666919231415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,32767,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,32767,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,65535,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,65535,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,1,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,65535,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,65535,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,1,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,1,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,3,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,3,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,float16,131071,0.0441599984963735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,7,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,7,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,float16,131071,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,128,1,float16,fp8,131071,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,15,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,64,0,1,float16,fp8,131071,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,15,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,15,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,31,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,63,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,127,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,255,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,255,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,511,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,511,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,511,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,1023,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,1023,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,2047,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,4095,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,2047,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,2047,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,2047,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,4095,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,4095,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,4095,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,8191,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,8191,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,8191,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,8191,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,16383,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,16383,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,16383,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,16383,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,32767,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,32767,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,32767,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,32767,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,65535,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,65535,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,65535,0.039936001102129616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,65535,0.031445334355036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,1,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,1,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,1,0.010453333457310995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,3,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,3,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,3,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,7,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,7,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,fp8,131071,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,128,1,float16,float16,131071,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,15,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,7,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,15,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,float16,131071,0.06338666876157124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,15,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,64,0,1,float16,fp8,131071,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,31,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,15,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,31,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,63,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,63,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,63,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,127,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,127,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,255,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,255,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,255,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,511,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,511,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,1023,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,1023,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,1023,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,2047,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,2047,0.014015999933083853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,2047,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,2047,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,8191,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,4095,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,4095,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,4095,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,4095,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,8191,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,16383,0.025055999557177227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,16383,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,8191,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,8191,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,16383,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,16383,0.022991999983787537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,32767,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,32767,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,32767,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,32767,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,65535,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,65535,0.06368533273537953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,65535,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,65535,0.05390933156013489
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,1,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,1,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,1,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,3,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,3,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,fp8,131071,0.09291199843088786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,0,1,float16,float16,131071,0.11213866869608562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,7,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,float16,131071,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,64,128,1,float16,fp8,131071,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,31,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,31,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,63,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,63,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,127,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,511,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,127,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,255,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,2047,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,2047,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,1023,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,2047,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,1023,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,1023,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,2047,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,4095,0.013776000589132309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,4095,0.026677332818508148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,4095,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,4095,0.024101334313551586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,float16,8191,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,1,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,float16,8191,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,128,1,float16,fp8,8191,0.014149333039919535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,3,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,3,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,3,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,1,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,7,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,64,0,1,float16,fp8,8191,0.03615466753641764
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,1,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,3,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,7,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,7,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,15,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,15,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,15,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,15,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,31,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,31,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,31,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,31,0.011722666521867117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,127,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,63,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,255,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,63,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,127,0.011685332904259363
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,127,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,127,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,255,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,255,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,255,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,511,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,511,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,511,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,511,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,1023,0.012485332787036896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,1023,0.01659199967980385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,1023,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,1023,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,2047,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,2047,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,2047,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,2047,0.023498666783173878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,4095,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,4095,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,4095,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,4095,0.034416000048319496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,float16,8191,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,float16,8191,0.06260266900062561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,1,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,128,1,float16,fp8,8191,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,1,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,1,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,1,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,64,0,1,float16,fp8,8191,0.05514133473237356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,7,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,3,0.014725333700577417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,3,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,3,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,15,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,3,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,7,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,7,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,7,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,15,0.01341333364446958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,15,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,63,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,63,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,15,0.014645333091417948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,31,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,127,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,31,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,31,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,31,0.01379199946920077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,63,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,63,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,255,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,127,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,127,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,511,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,127,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,255,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,255,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,255,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,511,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,511,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,511,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,1023,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,1023,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,1023,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,2047,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,1023,0.021381333470344543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,2047,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,2047,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,2047,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,4095,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,4095,0.06401066482067108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,4095,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,4095,0.05566933254400889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,float16,8191,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,float16,8191,0.1071626643339793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,1,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,128,1,float16,fp8,8191,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,1,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,3,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,64,0,1,float16,fp8,8191,0.09418666362762451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,1,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,1,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,3,0.02014933278163274
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,3,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,3,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,7,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,7,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,7,0.01708799973130226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,7,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,15,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,31,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,15,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,15,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,63,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,15,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,31,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,31,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,31,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,63,0.018858666221300762
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,63,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,63,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,127,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,127,0.018181333939234417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,127,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,127,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,255,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,511,0.02346133440732956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,255,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,255,0.017397332936525345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,255,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,511,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,1023,0.03629866739114126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,511,0.018613333503405254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,511,0.023039999107519787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,1023,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,1023,0.0179626668492953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,1023,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,2047,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,2047,0.059631998340288796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,2047,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,2047,0.053583999474843345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,4095,0.02143999934196472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,4095,0.09878933429718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,4095,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,4095,0.08668266733487447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,float16,8191,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,128,1,float16,fp8,8191,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,1,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,float16,8191,0.17698132991790771
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,1,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,1,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,1,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,64,0,1,float16,fp8,8191,0.15306666493415833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,3,0.01239466667175293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,3,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,3,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,3,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,7,0.012448000411192576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,7,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,7,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,15,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,7,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,15,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,15,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,15,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,31,0.012378666549921036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,31,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,31,0.012682666381200155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,31,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,63,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,63,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,63,0.012234666695197424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,63,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,127,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,127,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,127,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,127,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,255,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,255,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,255,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,255,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,511,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,511,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,511,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,511,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,1023,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,1023,0.018629333625237148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,float16,2047,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,1023,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,1023,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,float16,2047,0.015247999380032221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,128,1,float16,fp8,2047,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,1,0.013898666948080063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,64,0,1,float16,fp8,2047,0.025242666403452556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,3,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,1,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,3,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,1,0.013354666531085968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,1,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,3,0.013424000392357508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,3,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,7,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,7,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,7,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,7,0.01351999988158544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,15,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,15,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,15,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,15,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,31,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,31,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,31,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,63,0.01470400020480156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,63,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,31,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,127,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,63,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,63,0.01369599997997284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,127,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,127,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,127,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,255,0.014762666076421738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,255,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,255,0.01358933374285698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,255,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,511,0.013946666071812311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,511,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,511,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,1023,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,511,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,1023,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,1023,0.023333333432674408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,1023,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,fp8,2047,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,128,1,float16,float16,2047,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,1,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,float16,2047,0.040821333726247154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,1,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,64,0,1,float16,fp8,2047,0.03391999999682108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,1,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,1,0.017429333180189133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,3,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,3,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,7,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,3,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,3,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,7,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,7,0.018485333770513535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,7,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,15,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,15,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,15,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,15,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,31,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,63,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,31,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,31,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,31,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,63,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,63,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,63,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,127,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,127,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,127,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,127,0.017488000293572743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,255,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,511,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,255,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,255,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,255,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,511,0.018272000054518383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,511,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,511,0.021733333667119343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,1023,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,1023,0.03738133360942205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,float16,2047,0.05986666679382324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,2047,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,fp8,1023,0.018160000443458557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,1,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,1023,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,128,1,float16,float16,2047,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,1,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,64,0,1,float16,fp8,2047,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,1,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,1,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,3,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,3,0.02735999971628189
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,3,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,3,0.02532800038655599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,7,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,7,0.02677333354949951
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,7,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,7,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,15,0.026778665681680042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,15,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,31,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,15,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,15,0.025333332518736523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,31,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,31,0.026885333160559338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,31,0.02513066679239273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,63,0.026170666019121807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,63,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,63,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,63,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,127,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,127,0.027269333600997925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,127,0.025279998779296875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,127,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,255,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,255,0.026464000344276428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,255,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,255,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,511,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,511,0.03951466580231985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,511,0.026522666215896606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,511,0.03491200009981791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,1023,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,1023,0.05975466469923655
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,1023,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,1023,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,float16,2047,0.029504001140594482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,float16,2047,0.10237333178520203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,128,1,float16,fp8,2047,0.029194665451844532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,64,0,1,float16,fp8,2047,0.09054932991663615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,1,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,1,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,1,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,1,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,3,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,3,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,3,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,3,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,7,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,7,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,7,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,7,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,15,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,15,0.015466666469971338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,15,0.015626666446526844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,31,0.016634666671355564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,15,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,31,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,63,0.015978666643301647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,31,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,31,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,63,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,63,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,63,0.015429332852363586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,127,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,127,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,255,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,127,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,127,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,255,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,255,0.015360000232855478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,255,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,511,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,511,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,511,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,511,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,float16,1023,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,float16,1023,0.026362667481104534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,128,1,float16,fp8,1023,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,64,0,1,float16,fp8,1023,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,1,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,1,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,1,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,1,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,3,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,3,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,3,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,3,0.017685333887736004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,7,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,7,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,7,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,7,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,15,0.018906666586796444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,15,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,15,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,15,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,31,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,31,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,31,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,31,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,63,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,63,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,127,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,63,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,127,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,63,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,127,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,127,0.018837332725524902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,255,0.019029332945744198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,255,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,255,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,255,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,511,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,511,0.02421333392461141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,float16,1023,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,511,0.018805333723624546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,128,1,float16,fp8,1023,0.019088000059127808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,511,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,float16,1023,0.036090667049090065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,1,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,64,0,1,float16,fp8,1023,0.0313226655125618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,1,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,1,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,1,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,3,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,3,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,7,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,3,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,3,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,7,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,7,0.025173333783944447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,7,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,15,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,15,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,15,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,15,0.025749333202838898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,31,0.02703999976317088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,31,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,31,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,31,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,63,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,63,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,63,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,63,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,127,0.027061333258946735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,127,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,127,0.02548266698916753
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,255,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,127,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,255,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,255,0.026122666895389557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,255,0.025285333395004272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,511,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,511,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,float16,1023,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,511,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,511,0.035402665535608925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,float16,1023,0.060047999024391174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,128,1,float16,fp8,1023,0.02624000112215678
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,1,0.043951998154322304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,64,0,1,float16,fp8,1023,0.054042667150497437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,1,0.04354133208592733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,3,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,1,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,1,0.040752001106739044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,3,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,3,0.04067733387152354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,3,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,7,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,7,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,7,0.043525333205858864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,7,0.04142399877309799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,15,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,15,0.043925335009892784
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,15,0.04029866556326548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,15,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,31,0.0436160018046697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,31,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,31,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,31,0.04141866664091746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,63,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,63,0.043866669138272606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,127,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,63,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,63,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,127,0.04407466451327006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,127,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,127,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,255,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,255,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,255,0.04558399816354116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,255,0.04257066547870636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,511,0.04582933088143667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,511,0.06809600194295247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,511,0.04186133543650309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,511,0.061861331264177956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,float16,1023,0.10705600182215373
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,float16,1023,0.04580800235271454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,128,1,float16,fp8,1023,0.042037333051363625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,1,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,1,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,64,0,1,float16,fp8,1023,0.09541866183280945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,1,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,3,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,7,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,7,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,7,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,15,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,15,0.010239999741315842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,15,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,31,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,31,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,31,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,63,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,63,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,63,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,127,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,127,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,127,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,255,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,255,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,511,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,511,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,1023,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,1023,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,1023,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,1023,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,2047,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,2047,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,2047,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,2047,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,4095,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,4095,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,4095,0.011514666179815928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,4095,0.016586666305859882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,8191,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,8191,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,8191,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,8191,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,16383,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,16383,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,16383,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,16383,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,32767,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,32767,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,32767,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,32767,0.02294933299223582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,65535,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,65535,0.02916266769170761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,65535,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,65535,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,1,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,1,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,1,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,1,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,3,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,3,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,3,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,float16,131071,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,7,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,float16,131071,0.04797866443792979
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,7,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,0,1,float16,fp8,131071,0.03938666731119156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,64,128,1,float16,fp8,131071,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,15,0.0100853331387043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,31,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,63,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,127,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,127,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,511,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,511,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,2047,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,1023,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,1023,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,4095,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,1023,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,4095,0.015066667149464289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,2047,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,2047,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,2047,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,4095,0.01613866661985715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,4095,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,8191,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,8191,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,8191,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,8191,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,16383,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,16383,0.020917333662509918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,32767,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,16383,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,16383,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,32767,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,32767,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,32767,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,65535,0.012586666891972223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,65535,0.03959999978542328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,65535,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,65535,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,1,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,3,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,3,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,7,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,fp8,131071,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,128,1,float16,float16,131071,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,7,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,float16,131071,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,64,0,1,float16,fp8,131071,0.05509866774082184
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,15,0.00996800015370051
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,31,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,31,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,31,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,63,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,63,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,63,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,255,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,127,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,127,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,255,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,511,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,1023,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,1023,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,1023,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,4095,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,1023,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,2047,0.011781333635250727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,2047,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,8191,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,2047,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,2047,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,4095,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,4095,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,4095,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,8191,0.018640000373125076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,8191,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,8191,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,16383,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,16383,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,16383,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,16383,0.022954667607943218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,32767,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,32767,0.0394400010506312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,32767,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,32767,0.0315733328461647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,65535,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,65535,0.06294399996598561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,65535,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,1,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,65535,0.05398933092753092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,1,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,3,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,7,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,float16,131071,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,7,0.01033599985142549
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,float16,131071,0.11150933305422465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,128,1,float16,fp8,131071,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,15,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,64,0,1,float16,fp8,131071,0.0937653382619222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,63,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,31,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,31,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,63,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,127,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,127,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,255,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,255,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,511,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,511,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,511,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,511,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,1023,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,2047,0.01251199965675672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,1023,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,1023,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,2047,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,2047,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,2047,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,4095,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,4095,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,4095,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,4095,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,8191,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,8191,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,16383,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,8191,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,8191,0.023210667073726654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,16383,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,16383,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,16383,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,32767,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,32767,0.062261333068211876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,32767,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,32767,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,65535,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,65535,0.10956799983978271
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,65535,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,65535,0.09136533737182617
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,1,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,1,0.021290667355060577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,1,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,float16,131071,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,1,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,fp8,131071,0.16878400246302286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,3,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,3,0.021397332350413006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,0,1,float16,float16,131071,0.20469866196314493
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,64,128,1,float16,fp8,131071,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,3,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,7,0.02146666745344798
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,3,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,7,0.02123733361562093
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,7,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,7,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,15,0.02141333371400833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,15,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,15,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,15,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,31,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,31,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,31,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,31,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,63,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,63,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,63,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,63,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,127,0.021269333859284718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,127,0.021338666478792827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,127,0.020853333175182343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,127,0.020986666282018025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,255,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,255,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,255,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,255,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,float16,511,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,float16,511,0.027818667391935985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,128,1,float16,fp8,511,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,1,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,64,0,1,float16,fp8,511,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,1,0.028101332485675812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,1,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,1,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,3,0.0281333327293396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,3,0.02755733331044515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,3,0.026693334182103474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,3,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,7,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,7,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,7,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,7,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,15,0.02775999903678894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,15,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,15,0.02630399912595749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,15,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,31,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,31,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,31,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,31,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,63,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,63,0.028421332438786823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,63,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,63,0.0264533335963885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,127,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,127,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,127,0.027221334477265675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,127,0.026677332818508148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,255,0.029077333708604176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,255,0.0284853329261144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,255,0.02719466636578242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,255,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,float16,511,0.028138667345046997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,float16,511,0.04138133426507314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,128,1,float16,fp8,511,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,1,0.044165333112080894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,64,0,1,float16,fp8,511,0.03729599962631861
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,1,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,1,0.04152533411979675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,1,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,3,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,3,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,3,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,3,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,7,0.043893332282702126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,7,0.043840001026789345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,7,0.041637333730856575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,7,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,15,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,15,0.04381866753101349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,15,0.04138666639725367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,15,0.041482667128245033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,31,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,31,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,31,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,63,0.04548799991607666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,31,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,63,0.04388799766699473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,63,0.04172799984614054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,63,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,127,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,127,0.045519997676213585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,127,0.04142399877309799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,127,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,255,0.04563199977080027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,255,0.04594666759173075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,255,0.04257066547870636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,255,0.04188266893227895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,float16,511,0.0458133320013682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,float16,511,0.06974933544794719
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,128,1,float16,fp8,511,0.04242133100827535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,64,0,1,float16,fp8,511,0.0628959983587265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,1,0.07834133505821228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,1,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,1,0.07216533521811168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,1,0.07232533395290375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,3,0.07861333092053731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,3,0.07875200112660725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,3,0.07214933137098949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,3,0.07250666618347168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,7,0.07839466631412506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,7,0.07825600107510884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,7,0.0722453345855077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,7,0.07220800220966339
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,15,0.07854400078455608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,15,0.07896000146865845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,15,0.07223466535409291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,15,0.0724426656961441
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,31,0.0784746656815211
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,31,0.07832533121109009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,31,0.07247999807198842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,31,0.07226666808128357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,63,0.07863999903202057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,63,0.07904000083605449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,63,0.07215466598669688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,63,0.07248533268769582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,127,0.08018133540948232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,127,0.08040533463160197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,127,0.07226133346557617
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,127,0.07228266696135204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,255,0.08235733211040497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,255,0.08028266827265422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,255,0.07427200178305308
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,255,0.07428266604741414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,float16,511,0.08257066706816356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,float16,511,0.1237600048383077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,128,1,float16,fp8,511,0.07443733513355255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,64,0,1,float16,fp8,511,0.11105066537857056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,1,0.033701332906881966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,1,0.03395200024048487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,1,0.03329066683848699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,3,0.0341386670867602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,3,0.033626665671666466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,3,0.03332799921433131
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,3,0.03153600047032038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,7,0.033786666889985405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,7,0.03348266581694285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,7,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,7,0.03323733309904734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,15,0.03358400116364161
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,15,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,15,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,15,0.03179199993610382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,31,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,31,0.03368533402681351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,31,0.033173332611719765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,31,0.03340800106525421
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,63,0.033589333295822144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,63,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,63,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,63,0.03327466547489166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,127,0.033514666060606636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,127,0.034490667283535004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,127,0.03344533344109853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,127,0.031957333286603294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,1,0.03311999887228012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,float16,255,0.03356266766786575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,float16,255,0.0337119996547699
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,128,1,float16,fp8,255,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,1,0.04756266872088114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,64,0,1,float16,fp8,255,0.031530665854612984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,1,0.04782933493455251
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,1,0.04390933116277059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,1,0.04456533491611481
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,3,0.04763199885686239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,3,0.0468800018231074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,3,0.045312002301216125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,3,0.044213334719340004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,7,0.04725866516431173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,7,0.047728002071380615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,7,0.04393066465854645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,7,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,15,0.047482664386431374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,15,0.046629334489504494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,15,0.043882668018341064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,15,0.04413333535194397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,31,0.04693333307902018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,31,0.047775998711586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,31,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,31,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,63,0.047653332352638245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,63,0.0468800018231074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,63,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,63,0.04427200059096018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,127,0.04744533201058706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,127,0.047744000951449074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,127,0.04373333354791006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,127,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,float16,255,0.04769066472848257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,float16,255,0.04900800188382467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,128,1,float16,fp8,255,0.04558933277924856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,1,0.08030933141708374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,64,0,1,float16,fp8,255,0.044826666514078774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,1,0.08040533463160197
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,1,0.07462933162848155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,1,0.07448000212510426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,3,0.0809386670589447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,3,0.08058133224646251
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,3,0.07454933226108551
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,3,0.07417599856853485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,7,0.08040000001589458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,7,0.08050666749477386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,7,0.07457066575686137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,7,0.0747573326031367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,15,0.08072533210118611
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,15,0.08067200084527333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,15,0.07436800003051758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,15,0.07429333527882893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,31,0.0803413341442744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,31,0.08054399987061818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,31,0.0745119998852412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,31,0.07489066819349925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,63,0.07458133498827617
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,127,0.08176533381144206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,127,0.08259200056393941
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,127,0.0745119998852412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,255,0.08389866352081299
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,255,0.08247466882069905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,255,0.07644799848397572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,255,0.07726400097211202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,1,0.14870933691660562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,1,0.14801599582036337
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,1,0.13580800096193948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,1,0.13569600383440653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,3,0.14841066797574362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,float16,63,0.08074133098125458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,3,0.13577066858609518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,3,0.13555733362833658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,fp8,63,0.07433600227038066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,0,1,float16,float16,63,0.08037866652011871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,64,128,1,float16,fp8,127,0.0744053324063619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,7,0.14991999665896097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,7,0.13577600320180258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,7,0.1357439955075582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,15,0.1497173309326172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,15,0.150026669104894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,15,0.1356106698513031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,15,0.1360106666882833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,31,0.14994133512179056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,31,0.15005866686503092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,31,0.13567466537157694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,31,0.13593066732088724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,63,0.14949867129325867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,63,0.14824533462524414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,63,0.1360213359196981
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,3,0.14825066924095154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,63,0.13567466537157694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,127,0.1482186714808146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,7,0.14838932951291403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,127,0.14833066860834757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,127,0.13737066586812338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,127,0.13768532872200012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,float16,255,0.15265599886576334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,3,0.010559999694426855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,float16,255,0.1483733355998993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,128,1,float16,fp8,255,0.1418506701787313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,3,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,3,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,64,0,1,float16,fp8,255,0.1357493301232656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,7,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,7,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,7,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,31,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,7,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,15,0.009248000259200731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,15,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,15,0.010149333626031876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,31,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,31,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,127,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,63,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,63,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,255,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,255,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,255,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,511,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,1023,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,1023,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,1023,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,1023,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,2047,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,2047,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,2047,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,2047,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,4095,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,4095,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,4095,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,4095,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,8191,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,8191,0.018853332847356796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,8191,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,8191,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,16383,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,16383,0.02292799949645996
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,16383,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,16383,0.021055998901526134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,32767,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,32767,0.02824000020821889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,32767,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,32767,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,float16,65535,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,float16,65535,0.0448586642742157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,3,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,7,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,128,1,float16,fp8,65535,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,3,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,64,0,1,float16,fp8,65535,0.03550933301448822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,7,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,15,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,31,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,63,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,15,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,31,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,63,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,255,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,127,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,127,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,127,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,127,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,511,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,255,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,1023,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,255,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,1023,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,2047,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,1023,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,2047,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,2047,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,2047,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,4095,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,4095,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,4095,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,4095,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,8191,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,8191,0.018826667219400406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,16383,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,8191,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,8191,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,16383,0.012634667257467905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,16383,0.02386666586001714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,16383,0.02233600119749705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,32767,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,32767,0.04068266600370407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,32767,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,32767,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,1,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,1,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,fp8,65535,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,3,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,128,1,float16,float16,65535,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,3,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,3,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,float16,65535,0.06283199787139893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,3,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,64,0,1,float16,fp8,65535,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,7,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,7,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,15,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,15,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,15,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,31,0.009861333295702934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,63,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,63,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,255,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,127,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,127,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,255,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,1023,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,511,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,511,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,511,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,511,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,1023,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,1023,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,2047,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,2047,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,2047,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,2047,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,4095,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,4095,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,4095,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,4095,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,8191,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,8191,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,8191,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,8191,0.023034666975339253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,16383,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,16383,0.04144000013669332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,16383,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,16383,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,32767,0.06371200084686279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,32767,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,32767,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,32767,0.05412800113360087
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,1,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,1,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,float16,65535,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,3,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,float16,65535,0.1090773344039917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,128,1,float16,fp8,65535,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,64,0,1,float16,fp8,65535,0.09052800138791402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,7,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,7,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,15,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,15,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,15,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,15,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,31,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,31,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,31,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,63,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,127,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,127,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,63,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,255,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,255,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,1023,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,2047,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,1023,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,1023,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,2047,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,2047,0.018079999834299088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,2047,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,4095,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,4095,0.023157333334287006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,4095,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,4095,0.022474666436513264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,16383,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,8191,0.012698666503032049
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,8191,0.03939199944337209
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,8191,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,8191,0.033071999748547874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,16383,0.061573331554730736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,16383,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,16383,0.05407999952634176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,32767,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,32767,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,32767,0.10520533720652263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,32767,0.09236799677213033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,1,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,1,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,float16,65535,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,7,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,128,1,float16,fp8,65535,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,fp8,65535,0.16837332646052042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,3,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,64,0,1,float16,float16,65535,0.19631467262903848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,127,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,15,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,1023,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,1023,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,2047,0.016389333953460056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,8191,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,2047,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,4095,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,4095,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,8191,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,16383,0.040133332212766014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,16383,0.027290667096773785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,float16,32767,0.06182933350404104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,3,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,7,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,1,128,0,1,float16,fp8,32767,0.0444213350613912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,31,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,127,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,255,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,511,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,1023,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,1023,0.01250133290886879
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,2047,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,8191,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,2047,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,4095,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,4095,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,8191,0.0249493345618248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,16383,0.05962666869163513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,1,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,16383,0.039893334110577904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,3,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,float16,32767,0.10150933265686035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,7,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,2,128,0,1,float16,fp8,32767,0.0628053347269694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,15,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,511,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,255,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,1023,0.013477332890033722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,1023,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,2047,0.019365333020687103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,2047,0.018063999712467194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,4095,0.038319999972979225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,4095,0.023423999547958374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,8191,0.0591893345117569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,8191,0.0391146664818128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,16383,0.10139200091362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,3,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,16383,0.059893334905306496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,1,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,15,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,3,0.011509332805871964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,float16,32767,0.1835093299547831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,4,128,0,1,float16,fp8,32767,0.10206933816274007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,7,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,7,0.01209066684047381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,15,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,127,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,255,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,31,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,31,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,63,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,63,0.012261333564917246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,127,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,255,0.012565333396196365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,511,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,511,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,1023,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,1023,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,2047,0.0393653338154157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,2047,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,4095,0.06014933188756307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,4095,0.0403466671705246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,8191,0.10237866640090942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,8191,0.06187733511130015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,1,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,16383,0.10316266616185506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,16383,0.18651199340820312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,1,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,fp8,32767,0.18888533115386963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,7,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,16,8,128,0,1,float16,float16,32767,0.3573919932047526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,15,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,15,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,31,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,63,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,63,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,255,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,511,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,1023,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,1023,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,2047,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,2047,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,4095,0.011871999750534693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,4095,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,8191,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,8191,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,16383,0.02107733239730199
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,16383,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,32767,0.02924799919128418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,32767,0.027424000203609467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,1,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,3,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,3,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,65535,0.03195200115442276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,65535,0.032138665517171226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,7,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,15,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,63,0.011434666812419891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,31,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,31,0.011578666667143503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,63,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,float16,131071,0.036618667344252266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,127,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,1,128,0,1,float16,fp8,131071,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,127,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,255,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,511,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,511,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,1023,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,1023,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,2047,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,2047,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,4095,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,4095,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,8191,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,32767,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,8191,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,16383,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,16383,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,32767,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,1,0.010559999694426855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,1,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,65535,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,65535,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,7,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,15,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,float16,131071,0.02962133288383484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,31,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,2,128,0,1,float16,fp8,131071,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,31,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,63,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,127,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,255,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,255,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,1023,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,8191,0.015615999698638916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,2047,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,2047,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,4095,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,4095,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,8191,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,16383,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,16383,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,32767,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,32767,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,1,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,65535,0.023946667710940044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,65535,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,1,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,3,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,7,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,7,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,15,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,15,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,float16,131071,0.042597333590189614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,31,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,4,128,0,1,float16,fp8,131071,0.02773333340883255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,63,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,63,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,127,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,127,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,255,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,1023,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,2047,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,255,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,511,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,511,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,8191,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,1023,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,2047,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,4095,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,4095,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,8191,0.015392000476519266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,16383,0.018751999984184902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,16383,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,32767,0.022592000663280487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,32767,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,1,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,1,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,3,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,65535,0.038975998759269714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,65535,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,7,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,7,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,15,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,31,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,31,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,fp8,131071,0.04116799930731455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,16,8,128,0,1,float16,float16,131071,0.06011733412742615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,255,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,127,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,255,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,511,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,1023,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,2047,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,2047,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,4095,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,16383,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,4095,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,8191,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,8191,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,16383,0.022341333329677582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,32767,0.025034666061401367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,32767,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,1,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,1,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,65535,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,65535,0.025450666745503742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,15,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,15,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,127,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,float16,131071,0.03229333211978277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,31,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,31,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,1,128,0,1,float16,fp8,131071,0.029690665503342945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,255,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,255,0.01126933346192042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,511,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,2047,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,2047,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,4095,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,16383,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,4095,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,8191,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,8191,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,16383,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,32767,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,32767,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,1,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,65535,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,65535,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,3,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,3,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,15,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,fp8,131071,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,2,128,0,1,float16,float16,131071,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,31,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,63,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,127,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,255,0.009568000212311745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,511,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,4095,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,1023,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,2047,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,2047,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,4095,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,16383,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,8191,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,8191,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,16383,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,32767,0.02329600105683009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,32767,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,1,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,65535,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,3,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,3,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,7,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,65535,0.025968000292778015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,7,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,15,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,31,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,31,0.010399999717871347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,fp8,131071,0.04402133325735728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,63,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,4,128,0,1,float16,float16,131071,0.061568001906077065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,127,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,255,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,511,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,511,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,1023,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,1023,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,2047,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,2047,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,4095,0.0163680004576842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,16383,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,4095,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,8191,0.01820266619324684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,8191,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,16383,0.02242133269707362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,32767,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,32767,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,1,0.01190399999419848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,1,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,65535,0.05710400144259135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,65535,0.037647999823093414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,fp8,131071,0.05886933207511902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,7,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,15,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,16,8,128,0,1,float16,float16,131071,0.09819733103116353
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,255,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,511,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,1023,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,1023,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,2047,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,2047,0.018005333840847015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,4095,0.02160000056028366
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,4095,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,8191,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,8191,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,float16,16383,0.060559997955958046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,3,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,1,128,0,1,float16,fp8,16383,0.044106667240460716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,15,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,15,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,31,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,511,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,127,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,1023,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,127,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,255,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,1023,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,2047,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,2047,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,8191,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,4095,0.038191998998324074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,1,0.0122079998254776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,float16,16383,0.10115200281143188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,4095,0.023306667804718018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,3,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,3,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,8191,0.03949866692225138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,1,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,2,128,0,1,float16,fp8,16383,0.06004266440868378
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,7,0.011946666985750198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,7,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,63,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,15,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,15,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,31,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,31,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,511,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,127,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,127,0.012138667205969492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,255,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,255,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,511,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,1023,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,4095,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,1023,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,2047,0.03878933439652125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,2047,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,4095,0.060309335589408875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,8191,0.10179733236630757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,8191,0.061978667974472046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,1,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,1,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,7,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,fp8,16383,0.10345066587130229
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,3,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,4,128,0,1,float16,float16,16383,0.18488534291585287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,3,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,7,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,15,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,15,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,31,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,31,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,63,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,63,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,127,0.015402667224407196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,127,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,255,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,255,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,511,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,511,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,1023,0.03455466777086258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,1023,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,2047,0.06041066845258077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,2047,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,4095,0.10248000423113506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,4095,0.06225599845250448
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,8191,0.18692266941070557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,8191,0.10467200477917989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,1,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,fp8,16383,0.18917866547902426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,3,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,16,8,128,0,1,float16,float16,16383,0.3558719952901204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,63,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,7,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,7,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,15,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,1023,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,255,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,2047,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,2047,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,4095,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,4095,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,16383,0.020848001043001812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,8191,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,8191,0.01952533299724261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,16383,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,32767,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,32767,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,1,0.010384000216921171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,1,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,65535,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,65535,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,3,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,7,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,15,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,63,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,31,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,float16,131071,0.04650133351484934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,255,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,1,128,0,1,float16,fp8,131071,0.032032000521818794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,255,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,1023,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,2047,0.013269333789745966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,2047,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,4095,0.01684800038735072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,4095,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,8191,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,8191,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,16383,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,16383,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,32767,0.024405332903067272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,32767,0.023061332603295643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,1,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,1,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,3,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,65535,0.0421973317861557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,7,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,65535,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,31,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,15,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,63,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,float16,131071,0.06323199967543285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,63,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,127,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,2,128,0,1,float16,fp8,131071,0.04458666841189066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,255,0.0102186668664217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,1023,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,2047,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,2047,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,4095,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,4095,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,8191,0.019050666441520054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,8191,0.017781333376963932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,16383,0.02314666658639908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,16383,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,32767,0.04178133110205332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,65535,0.05914133290449778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,32767,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,1,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,1,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,65535,0.04067199925581614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,7,0.009232000137368837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,15,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,63,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,15,0.010298666854699453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,31,0.010399999717871347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,31,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,float16,131071,0.10074133674303691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,63,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,127,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,4,128,0,1,float16,fp8,131071,0.06150933106740316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,127,0.009583999713261923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,255,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,2047,0.0145066666106383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,1023,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,1023,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,2047,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,4095,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,4095,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,16383,0.024160000185171764
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,8191,0.02089066555102666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,8191,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,16383,0.037589333951473236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,32767,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,32767,0.05772800246874491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,1,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,65535,0.10139200091362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,3,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,65535,0.06168533364931742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,7,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,7,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,31,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,31,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,63,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,fp8,131071,0.10761066277821858
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,63,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,16,8,128,0,1,float16,float16,131071,0.1875093380610148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,255,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,1023,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,2047,0.0210506667693456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,2047,0.021018666525681812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,511,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,511,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,1023,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,4095,0.03942933430274328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,4095,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,1,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,1,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,3,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,float16,8191,0.06000000238418579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,3,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,1,128,0,1,float16,fp8,8191,0.04375466704368591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,7,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,7,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,15,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,15,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,31,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,31,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,63,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,127,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,127,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,255,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,255,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,511,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,511,0.013280000537633896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,1023,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,1023,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,2047,0.03938133269548416
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,2047,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,4095,0.060592000683148704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,4095,0.041002665956815086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,1,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,1,0.01328533391157786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,7,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,fp8,8191,0.06221333146095276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,3,0.015029333531856537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,2,128,0,1,float16,float16,8191,0.10155733426411946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,3,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,7,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,15,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,15,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,31,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,255,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,31,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,63,0.01481066644191742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,63,0.013967999567588171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,127,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,127,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,255,0.013541333377361298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,511,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,2047,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,511,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,1023,0.03533866753180822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,1023,0.023007998863856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,2047,0.0598880002895991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,1,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,1,0.018197332819302876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,4095,0.1019040048122406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,3,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,8191,0.1053653359413147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,fp8,4095,0.06192533175150553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,3,0.01907733331123988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,4,128,0,1,float16,float16,8191,0.18938666582107544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,7,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,7,0.018618666877349217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,15,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,15,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,127,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,31,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,31,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,63,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,63,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,127,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,255,0.019258666783571243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,255,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,511,0.03352533280849457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,511,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,1023,0.05365866422653198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,1023,0.036229332288106285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,4095,0.17324266831080118
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,2047,0.09630399942398071
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,2047,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,4095,0.09680533409118652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,1,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,1,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,float16,8191,0.3291040062904358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,16,8,128,0,1,float16,fp8,8191,0.17382399241129556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,3,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,3,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,15,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,7,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,7,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,63,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,15,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,31,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,31,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,63,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,127,0.013584000368913015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,127,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,255,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,255,0.013221333424250284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,511,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,511,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,1,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,1023,0.0201706662774086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,1023,0.018330667167901993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,float16,2047,0.04070399949947993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,1,128,0,1,float16,fp8,2047,0.02754133443037669
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,1,0.014378666877746582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,3,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,3,0.014671999961137772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,7,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,7,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,15,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,15,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,127,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,31,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,31,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,63,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,63,0.01443733274936676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,511,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,127,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,255,0.015253332753976187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,255,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,511,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,1023,0.03454400102297465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,1023,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,float16,2047,0.06162666777769724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,1,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,2,128,0,1,float16,fp8,2047,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,3,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,15,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,1,0.01950399950146675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,3,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,7,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,7,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,15,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,63,0.019402666638294857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,127,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,31,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,31,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,255,0.017456000049908955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,63,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,511,0.034789333740870156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,127,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,1023,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,255,0.020288000504175823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,511,0.023269332945346832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,1023,0.053743998209635414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,float16,2047,0.09668266773223877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,4,128,0,1,float16,fp8,2047,0.05996266504128774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,1,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,1,0.02992533395687739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,3,0.029818666477998097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,3,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,7,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,7,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,15,0.029317334294319153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,15,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,31,0.02938133229811986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,63,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,31,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,63,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,127,0.029493334392706554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,127,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,255,0.03315199911594391
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,255,0.027029333015282948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,511,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,1023,0.09291733304659526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,511,0.040106666584809623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,1023,0.057775999108950295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,float16,2047,0.1744640072186788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,16,8,128,0,1,float16,fp8,2047,0.10220266381899516
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,1,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,1,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,3,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,3,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,7,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,7,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,15,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,15,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,31,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,31,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,63,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,63,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,255,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,127,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,127,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,255,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,511,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,511,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,float16,1023,0.03714666763941447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,1,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,1,128,0,1,float16,fp8,1023,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,3,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,1,0.019679999599854153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,3,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,7,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,7,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,15,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,15,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,31,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,31,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,63,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,63,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,127,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,127,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,255,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,511,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,255,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,511,0.034272000193595886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,1,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,float16,1023,0.05406400064627329
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,1,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,2,128,0,1,float16,fp8,1023,0.03738666574160258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,3,0.029850666721661884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,3,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,15,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,7,0.031258667508761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,7,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,63,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,15,0.031045332551002502
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,127,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,31,0.031343999008337654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,31,0.027749332288901012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,63,0.031370667119820915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,127,0.029103999336560566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,255,0.033471999069054924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,255,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,511,0.05588266750176748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,511,0.0400693342089653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,float16,1023,0.09470400214195251
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,1,0.0499946673711141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,3,0.04365866879622141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,4,128,0,1,float16,fp8,1023,0.06003733476003011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,7,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,1,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,3,0.049770668148994446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,7,0.04983466863632202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,15,0.049973333875338234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,15,0.04358399907747904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,31,0.05018133421738943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,63,0.050000001986821495
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,31,0.04381333291530609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,63,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,255,0.04555733501911163
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,127,0.050928001602490745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,127,0.04354133208592733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,255,0.05399466554323832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,511,0.06612800061702728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,511,0.09533333778381348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,1,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,7,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,3,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,fp8,1023,0.10415466626485188
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,16,8,128,0,1,float16,float16,1023,0.17287999391555786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,31,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,127,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,255,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,511,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,1023,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,2047,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,8191,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,2047,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,4095,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,4095,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,8191,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,16383,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,16383,0.02029866725206375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,32767,0.02603200078010559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,32767,0.02421333392461141
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,1,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,3,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,65535,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,65535,0.041936000188191734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,15,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,31,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,31,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,255,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,float16,131071,0.06563733518123627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,63,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,1,128,0,1,float16,fp8,131071,0.046298667788505554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,127,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,255,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,511,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,1023,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,1023,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,2047,0.01526933287580808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,2047,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,4095,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,4095,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,8191,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,8191,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,16383,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,16383,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,32767,0.04055466751257578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,32767,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,1,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,65535,0.06248533229033152
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,65535,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,7,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,15,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,15,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,float16,131071,0.10079999764760335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,127,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,63,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,31,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,2,128,0,1,float16,fp8,131071,0.06190933287143707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,255,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,511,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,1023,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,1023,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,2047,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,2047,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,4095,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,4095,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,8191,0.021189334491888683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,8191,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,16383,0.03822933385769526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,16383,0.02497066557407379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,32767,0.0584853341182073
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,32767,0.039647998909155525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,65535,0.10351999600728352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,65535,0.06020266811052958
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,7,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,31,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,float16,131071,0.18588266770044962
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,127,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,4,128,0,1,float16,fp8,131071,0.10706667105356853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,255,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,255,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,511,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,2047,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,1023,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,8191,0.03905066599448522
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,1023,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,2047,0.01634666696190834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,4095,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,4095,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,8191,0.024197332561016083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,16383,0.0582826683918635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,16383,0.03962666789690653
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,32767,0.10050132870674133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,32767,0.062181333700815834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,1,0.02475200096766154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,65535,0.185808002948761
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,1,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,65535,0.10518399874369304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,3,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,3,0.022965334355831146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,7,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,7,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,fp8,131071,0.19337600469589233
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,16,8,128,0,1,float16,float16,131071,0.35541868209838867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,15,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,31,0.024112001061439514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,31,0.023413332800070446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,63,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,15,0.023397333920001984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,63,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,127,0.02465066562096278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,127,0.023434666295846302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,255,0.023247999449570973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,float16,511,0.03766400118668874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,511,0.031231999397277832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,1,0.029333333174387615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,3,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,1,128,0,1,float16,fp8,255,0.023621333142121632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,3,0.02926933268706004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,1,0.03161599983771642
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,7,0.03125333289305369
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,7,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,15,0.03325333446264267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,15,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,31,0.03335466732581457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,63,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,31,0.029839999973773956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,127,0.031471999982992806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,127,0.029296000798543293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,63,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,255,0.037290667494138084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,255,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,float16,511,0.05841066439946493
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,2,128,0,1,float16,fp8,511,0.04192533095677694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,1,0.04586133360862732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,3,0.05170666674772898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,1,0.05203199883302053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,7,0.05211733281612396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,3,0.04569066564242045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,7,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,15,0.05195199946562449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,15,0.045824001232783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,31,0.05177066723505656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,31,0.045610666275024414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,63,0.0518506666024526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,63,0.04553066690762838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,127,0.053472002347310386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,127,0.04541866481304169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,255,0.05670933425426483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,255,0.04784533381462097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,float16,511,0.09896533687909444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,4,128,0,1,float16,fp8,511,0.0682826687892278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,1,0.09098133444786072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,1,0.07685866455237071
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,3,0.09099733829498291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,3,0.07691733539104462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,7,0.09098133444786072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,7,0.07670933504899342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,15,0.09120532870292664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,15,0.07796266674995422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,31,0.0909493366877238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,31,0.07696533203125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,63,0.09130666653315227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,63,0.07685333490371704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,127,0.09062400460243225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,127,0.07855466504891713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,255,0.0976586639881134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,255,0.07869333525498708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,fp8,511,0.12140799562136333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,16,8,128,0,1,float16,float16,511,0.1765706737836202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,1,0.039733332892258964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,3,0.0397173340121905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,3,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,7,0.0395359992980957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,7,0.03732266773780187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,15,0.039520000418027244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,15,0.0377866675456365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,31,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,31,0.03749333322048187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,63,0.03969600051641464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,63,0.03772266705830892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,127,0.040474665661652885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,127,0.03775999943415324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,float16,255,0.04353066782156626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,1,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,255,0.03811199963092804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,1,0.0497920016447703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,3,0.05579199890295664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,3,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,7,0.05586666862169901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,7,0.04966400067011515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,15,0.05574933191140493
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,15,0.04971200227737427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,31,0.05682133138179779
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,31,0.04956266780694326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,1,128,0,1,float16,fp8,1,0.03755733370780945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,63,0.05601066847642263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,63,0.04990933338801066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,127,0.05725333094596863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,127,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,float16,255,0.0621973325808843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,1,0.09527466694513957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,1,0.08229866623878479
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,2,128,0,1,float16,fp8,255,0.051813334226608276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,3,0.0950986643632253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,3,0.08272533118724823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,7,0.09546666344006856
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,7,0.08241066833337148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,15,0.09475200374921162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,15,0.0827893316745758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,31,0.09490666786829631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,31,0.08246933420499165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,63,0.09504533807436626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,63,0.0824480007092158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,127,0.09507200121879578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,127,0.08303999900817871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,float16,255,0.1029813289642334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,4,128,0,1,float16,fp8,255,0.08616532882054646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,1,0.17086933056513467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,1,0.14667733510335287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,3,0.1478506624698639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,3,0.17063466707865396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,7,0.17086933056513467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,7,0.14615466197331747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,15,0.17053866386413574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,15,0.14709867040316263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,31,0.1706719994544983
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,31,0.14692800243695578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,1,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,63,0.1707893411318461
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,1,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,63,0.14705066879590353
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,3,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,127,0.1460586686929067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,7,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,127,0.1698346734046936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,15,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,float16,255,0.1828213334083557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,16,8,128,0,1,float16,fp8,255,0.14763733744621277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,63,0.010245333115259806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,2047,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,511,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,1023,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,1023,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,2047,0.015578666081031164
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,4095,0.01851733277241389
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,4095,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,8191,0.019674666225910187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,8191,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,16383,0.02508266766866048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,16383,0.022944000860055287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,32767,0.041519999504089355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,32767,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,1,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,15,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,3,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,7,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,7,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,float16,65535,0.06191466748714447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,1,128,0,1,float16,fp8,65535,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,31,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,255,0.010362666721145311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,127,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,511,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,1023,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,1023,0.011503999431928
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,2047,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,2047,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,4095,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,4095,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,8191,0.021365332106749218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,8191,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,16383,0.03941866755485535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,16383,0.025216000775496166
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,32767,0.06015466650327047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,32767,0.039701332648595176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,3,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,1,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,7,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,float16,65535,0.10230933626492818
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,15,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,2,128,0,1,float16,fp8,65535,0.06198933223883311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,127,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,2047,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,255,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,511,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,511,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,1023,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,2047,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,4095,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,4095,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,8191,0.039488000174363456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,8191,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,16383,0.06003733476003011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,16383,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,1,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,32767,0.10044800241788228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,32767,0.06196266909440359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,3,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,7,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,float16,65535,0.18525866667429605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,4,128,0,1,float16,fp8,65535,0.10507733623186748
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,15,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,63,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,127,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,1023,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,127,0.011429333438475927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,511,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,2047,0.020901332298914593
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,2047,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,4095,0.03809066613515218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,16383,0.09980266292889912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,4095,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,8191,0.05804799993832906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,8191,0.03897066662708918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,16383,0.058837334314982094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,1,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,32767,0.18385599056879678
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,1,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,32767,0.10194666186968486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,1,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,3,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,3,0.009472000102202097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,7,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,float16,65535,0.35062400499979657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,16,8,128,0,1,float16,fp8,65535,0.1869493325551351
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,7,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,7,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,15,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,15,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,31,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,31,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,31,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,63,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,63,0.010346666599313417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,127,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,127,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,127,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,127,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,255,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,255,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,511,0.009466666728258133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,2047,0.01458666721979777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,1023,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,2047,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,2047,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,2047,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,4095,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,8191,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,4095,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,4095,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,8191,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,4095,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,8191,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,8191,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,16383,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,16383,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,16383,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,16383,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,32767,0.04140799989302953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,32767,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,32767,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,1,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,32767,0.031914666295051575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,1,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,3,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,3,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,float16,65535,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,3,0.010373333469033241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,7,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,float16,65535,0.06425066788991292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,128,1,float16,fp8,65535,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,7,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,64,0,1,float16,fp8,65535,0.054474666714668274
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,15,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,31,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,63,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,127,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,127,0.009488000224033991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,127,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,511,0.009610666582981745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,1023,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,1023,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,2047,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,2047,0.01523200049996376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,2047,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,2047,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,4095,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,4095,0.01932266727089882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,4095,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,4095,0.017071999609470367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,8191,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,8191,0.02473066747188568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,8191,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,8191,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,16383,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,16383,0.039861333866914116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,16383,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,16383,0.032314665615558624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,32767,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,32767,0.06398400167624156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,32767,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,32767,0.05409599840641022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,3,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,3,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,float16,65535,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,float16,65535,0.10986133416493733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,15,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,128,1,float16,fp8,65535,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,64,0,1,float16,fp8,65535,0.09077866872151692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,31,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,127,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,63,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,127,0.010421333213647207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,511,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,1023,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,1023,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,2047,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,2047,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,2047,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,2047,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,8191,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,4095,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,4095,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,4095,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,4095,0.02223466585079829
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,8191,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,8191,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,8191,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,16383,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,16383,0.05384000142415365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,16383,0.060880000392595925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,16383,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,32767,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,32767,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,32767,0.10617599884668986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,1,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,32767,0.09302933017412822
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,1,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,1,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,3,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,3,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,float16,65535,0.19507733980814615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,3,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,float16,65535,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,128,1,float16,fp8,65535,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,7,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,7,0.008842666943868002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,64,0,1,float16,fp8,65535,0.1688906749089559
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,7,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,15,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,15,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,15,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,31,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,31,0.009418666362762451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,31,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,31,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,63,0.008703999842206636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,63,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,127,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,127,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,127,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,127,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,255,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,255,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,255,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,511,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,1023,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,1023,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,1023,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,4095,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,2047,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,2047,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,2047,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,8191,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,2047,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,4095,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,4095,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,4095,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,8191,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,8191,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,8191,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,16383,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,16383,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,16383,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,16383,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,32767,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,32767,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,32767,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,32767,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,65535,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,65535,0.031152000029881794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,65535,0.029477333029111225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,65535,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,1,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,3,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,1,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,3,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,3,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,7,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,float16,131071,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,7,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,float16,131071,0.03378133227427801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,128,1,float16,fp8,131071,0.01179733375708262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,64,0,1,float16,fp8,131071,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,15,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,15,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,31,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,31,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,31,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,31,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,63,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,127,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,63,0.00933333362142245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,63,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,127,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,255,0.009530666594703993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,127,0.009797333429257074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,127,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,255,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,255,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,255,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,511,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,511,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,511,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,1023,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,1023,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,1023,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,4095,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,1023,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,4095,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,2047,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,2047,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,2047,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,2047,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,4095,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,4095,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,16383,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,16383,0.018863999595244724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,8191,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,8191,0.015418666104475657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,8191,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,8191,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,16383,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,32767,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,16383,0.01924266666173935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,32767,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,32767,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,32767,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,65535,0.009205333267649015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,65535,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,65535,0.022101332743962605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,65535,0.025589334468046825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,1,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,1,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,3,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,3,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,3,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,float16,131071,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,3,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,7,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,fp8,131071,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,7,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,128,1,float16,fp8,131071,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,64,0,1,float16,float16,131071,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,7,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,15,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,15,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,15,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,31,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,31,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,31,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,63,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,63,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,63,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,127,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,127,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,127,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,127,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,255,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,255,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,1023,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,511,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,511,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,1023,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,1023,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,4095,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,1023,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,2047,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,2047,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,2047,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,2047,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,4095,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,8191,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,4095,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,16383,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,4095,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,8191,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,16383,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,8191,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,8191,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,16383,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,16383,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,32767,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,32767,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,32767,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,32767,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,65535,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,65535,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,65535,0.008879999940594038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,65535,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,1,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,3,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,3,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,float16,131071,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,3,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,fp8,131071,0.026869334280490875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,0,1,float16,float16,131071,0.028805332879225414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,7,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,64,128,1,float16,fp8,131071,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,7,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,7,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,15,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,15,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,31,0.009589333087205887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,31,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,31,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,63,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,31,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,63,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,63,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,127,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,127,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,127,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,127,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,255,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,255,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,511,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,1023,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,1023,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,511,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,511,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,1023,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,2047,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,2047,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,2047,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,2047,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,8191,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,4095,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,4095,0.011440000186363855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,4095,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,4095,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,8191,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,16383,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,8191,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,8191,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,16383,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,16383,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,16383,0.02109866589307785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,32767,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,32767,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,32767,0.021386665602525074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,32767,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,65535,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,65535,0.023391999304294586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,1,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,65535,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,65535,0.023226665953795116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,1,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,3,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,7,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,3,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,fp8,131071,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,7,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,128,1,float16,float16,131071,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,15,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,7,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,7,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,31,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,float16,131071,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,15,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,64,0,1,float16,fp8,131071,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,15,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,31,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,31,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,31,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,63,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,63,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,63,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,63,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,127,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,127,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,255,0.009365333244204521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,255,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,255,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,255,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,511,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,1023,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,1023,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,1023,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,1023,0.00879466657837232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,2047,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,4095,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,2047,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,2047,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,2047,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,4095,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,4095,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,4095,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,8191,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,8191,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,8191,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,16383,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,8191,0.016751999656359356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,16383,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,16383,0.018016000588734944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,16383,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,32767,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,32767,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,32767,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,32767,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,65535,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,65535,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,65535,0.021685334543387096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,65535,0.021231998999913532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,1,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,1,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,1,0.009482666850090027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,3,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,3,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,7,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,7,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,float16,131071,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,float16,131071,0.02961066613594691
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,7,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,7,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,15,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,128,1,float16,fp8,131071,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,15,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,31,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,64,0,1,float16,fp8,131071,0.027429332335789997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,31,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,31,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,63,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,63,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,127,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,127,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,255,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,255,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,1023,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,255,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,511,0.009989333028594652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,511,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,1023,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,2047,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,2047,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,2047,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,2047,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,4095,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,4095,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,4095,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,4095,0.014117332796255747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,8191,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,8191,0.016895999511082966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,8191,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,8191,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,16383,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,16383,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,16383,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,16383,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,32767,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,32767,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,32767,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,32767,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,65535,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,65535,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,65535,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,1,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,65535,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,1,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,3,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,3,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,7,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,float16,131071,0.04350399971008301
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,7,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,fp8,131071,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,7,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,0,1,float16,fp8,131071,0.03370666752258936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,64,128,1,float16,float16,131071,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,15,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,7,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,15,0.010490667074918747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,15,0.010378666842977205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,63,0.010319999729593595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,127,0.009994666402538618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,31,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,63,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,255,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,511,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,511,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,1023,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,1023,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,2047,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,2047,0.015290666371583939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,4095,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,2047,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,2047,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,4095,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,4095,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,4095,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,8191,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,8191,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,8191,0.024933333198229473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,8191,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,16383,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,16383,0.04060266663630804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,16383,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,16383,0.03158933420976003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,1,0.010426666587591171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,float16,32767,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,float16,32767,0.0644160012404124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,3,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,128,1,float16,fp8,32767,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,3,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,3,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,15,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,15,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,64,0,1,float16,fp8,32767,0.05513600011666616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,15,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,31,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,31,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,31,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,63,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,127,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,127,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,255,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,255,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,255,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,255,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,511,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,1023,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,1023,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,1023,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,2047,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,2047,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,2047,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,2047,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,4095,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,4095,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,4095,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,4095,0.02181866765022278
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,8191,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,8191,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,8191,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,8191,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,16383,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,16383,0.06189866860707601
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,16383,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,16383,0.054058666030565895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,1,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,1,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,1,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,1,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,3,0.011519999553759893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,float16,32767,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,3,0.01145600030819575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,3,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,float16,32767,0.1053493320941925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,128,1,float16,fp8,32767,0.01357866699496905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,7,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,3,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,64,0,1,float16,fp8,32767,0.09201600154240926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,7,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,7,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,7,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,15,0.011370666325092316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,15,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,15,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,15,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,31,0.011402666568756104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,31,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,31,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,31,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,63,0.012181332955757776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,63,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,127,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,63,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,63,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,255,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,127,0.011381333072980246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,255,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,127,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,127,0.012367999802033106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,255,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,511,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,511,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,511,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,1023,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,2047,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,1023,0.01613333324591319
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,1023,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,1023,0.015562667200962702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,2047,0.014362666755914688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,4095,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,2047,0.024357333779335022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,2047,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,4095,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,4095,0.03965333352486292
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,4095,0.03159466634194056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,8191,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,8191,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,8191,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,8191,0.05479466418425242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,16383,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,16383,0.10714667042096455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,16383,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,1,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,16383,0.09305066863695781
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,1,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,1,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,float16,32767,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,3,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,float16,32767,0.1972106695175171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,3,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,128,1,float16,fp8,32767,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,3,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,64,0,1,float16,fp8,32767,0.16875733931859335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,7,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,7,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,15,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,15,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,15,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,31,0.00978133330742518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,31,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,63,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,63,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,63,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,63,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,127,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,127,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,127,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,255,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,255,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,255,0.008736000085870424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,255,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,511,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,511,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,511,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,2047,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,1023,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,1023,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,2047,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,2047,0.01257066677014033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,2047,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,8191,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,4095,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,4095,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,4095,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,4095,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,8191,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,8191,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,16383,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,16383,0.019152000546455383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,16383,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,16383,0.019306667149066925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,32767,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,32767,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,32767,0.021130666136741638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,32767,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,65535,0.011973333855470022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,65535,0.023584000766277313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,65535,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,1,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,1,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,65535,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,1,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,3,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,3,0.011893333246310553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,float16,131071,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,3,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,fp8,131071,0.029189333319664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,7,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,128,1,float16,fp8,131071,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,7,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,64,0,1,float16,float16,131071,0.02941333254178365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,15,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,15,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,15,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,63,0.008762666955590248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,15,0.010527999450763067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,31,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,31,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,63,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,63,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,63,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,127,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,127,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,127,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,127,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,255,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,255,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,511,0.009626666704813639
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,1023,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,1023,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,1023,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,2047,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,2047,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,2047,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,2047,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,4095,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,4095,0.015216000378131866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,4095,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,4095,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,8191,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,8191,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,8191,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,8191,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,16383,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,16383,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,16383,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,16383,0.017445333302021027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,32767,0.012096000214417776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,32767,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,32767,0.021375998854637146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,32767,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,65535,0.012159999459981918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,65535,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,65535,0.02752000093460083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,65535,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,1,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,3,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,3,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,7,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,float16,131071,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,7,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,128,1,float16,fp8,131071,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,float16,131071,0.04554133117198944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,15,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,64,0,1,float16,fp8,131071,0.035386666655540466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,15,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,15,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,31,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,31,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,31,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,31,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,63,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,63,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,63,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,127,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,255,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,127,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,127,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,255,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,255,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,511,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,511,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,511,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,2047,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,511,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,1023,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,1023,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,1023,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,2047,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,2047,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,2047,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,4095,0.011306667079528173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,4095,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,4095,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,4095,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,8191,0.012618667135636011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,8191,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,8191,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,8191,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,16383,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,16383,0.021205333371957142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,16383,0.012671999633312225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,16383,0.0195573332409064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,32767,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,32767,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,32767,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,32767,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,65535,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,65535,0.041834667325019836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,65535,0.033258666594823204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,65535,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,3,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,3,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,float16,131071,0.06292800108591716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,fp8,131071,0.013877333452304205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,15,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,7,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,0,1,float16,fp8,131071,0.05561066667238871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,64,128,1,float16,float16,131071,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,15,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,15,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,15,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,31,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,31,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,63,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,63,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,255,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,255,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,511,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,1023,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,1023,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,1023,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,2047,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,2047,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,1023,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,2047,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,2047,0.01801066721479098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,4095,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,4095,0.02316266546646754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,4095,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,4095,0.022085333863894146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,8191,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,8191,0.03957866628964742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,8191,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,8191,0.03201066702604294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,1,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,1,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,float16,16383,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,1,0.012469333906968435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,3,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,float16,16383,0.061936000982920326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,128,1,float16,fp8,16383,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,7,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,3,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,64,0,1,float16,fp8,16383,0.05407466491063436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,3,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,15,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,3,0.011242666592200598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,15,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,7,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,7,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,15,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,15,0.012304000556468964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,31,0.011776000261306763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,31,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,31,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,63,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,63,0.011359999577204386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,63,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,127,0.011450666934251785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,127,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,127,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,255,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,255,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,255,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,255,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,511,0.011231999844312668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,511,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,511,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,511,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,1023,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,1023,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,1023,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,1023,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,2047,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,2047,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,4095,0.04035199930270513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,2047,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,2047,0.023365333676338196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,4095,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,4095,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,4095,0.03223466624816259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,8191,0.06201600035031637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,8191,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,8191,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,8191,0.05596800148487091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,1,0.014469332993030548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,1,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,1,0.014229333649079004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,float16,16383,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,1,0.014432000617186228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,float16,16383,0.10739733775456746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,3,0.014245333770910898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,128,1,float16,fp8,16383,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,3,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,64,0,1,float16,fp8,16383,0.09332266449928284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,3,0.014661333213249842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,7,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,3,0.014474666366974512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,15,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,7,0.013445333888133367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,7,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,7,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,15,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,15,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,15,0.01350933313369751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,31,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,31,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,31,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,31,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,63,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,63,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,63,0.013807999591032663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,63,0.014138666292031607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,127,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,127,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,127,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,127,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,255,0.015024000157912573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,255,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,255,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,255,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,511,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,511,0.016805333395799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,1023,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,511,0.013514666507641474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,511,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,1023,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,2047,0.03955733279387156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,1023,0.023130667706330616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,1023,0.021349333226680756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,2047,0.016864000509182613
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,2047,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,2047,0.033376000821590424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,4095,0.01682666689157486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,4095,0.0637546678384145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,4095,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,4095,0.056074668963750206
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,8191,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,8191,0.10729066530863444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,8191,0.016730666160583496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,8191,0.0939520001411438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,1,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,1,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,float16,16383,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,1,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,3,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,float16,16383,0.19751999775568643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,3,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,7,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,128,1,float16,fp8,16383,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,3,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,64,0,1,float16,fp8,16383,0.1687999963760376
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,3,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,7,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,7,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,7,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,15,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,15,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,15,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,31,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,31,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,31,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,63,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,63,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,127,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,127,0.01211200033624967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,255,0.011530666301647821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,127,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,255,0.011114666859308878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,255,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,255,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,511,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,511,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,1023,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,1023,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,1023,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,1023,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,2047,0.0229120006163915
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,2047,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,2047,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,2047,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,float16,4095,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,float16,4095,0.040250666439533234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,1,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,128,1,float16,fp8,4095,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,1,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,3,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,64,0,1,float16,fp8,4095,0.033402666449546814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,3,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,1,0.01451733335852623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,1,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,3,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,3,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,7,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,7,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,7,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,7,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,15,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,15,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,15,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,15,0.013450667262077332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,31,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,31,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,31,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,31,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,63,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,63,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,63,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,63,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,127,0.014490666488806406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,127,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,127,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,127,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,255,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,255,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,511,0.017018667111794155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,255,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,255,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,511,0.013295999417702356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,511,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,511,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,1023,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,1023,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,1023,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,1023,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,2047,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,2047,0.04165866722663244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,2047,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,2047,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,float16,4095,0.0639466643333435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,float16,4095,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,1,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,1,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,3,0.01886933296918869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,128,1,float16,fp8,4095,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,3,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,1,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,64,0,1,float16,fp8,4095,0.056032001972198486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,1,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,3,0.017792000124851864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,3,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,7,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,7,0.018298666924238205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,7,0.01704000060757001
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,7,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,15,0.01736533393462499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,15,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,31,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,15,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,15,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,31,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,31,0.017935999979575474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,31,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,63,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,63,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,63,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,63,0.017317333569129307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,127,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,127,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,127,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,255,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,127,0.017312000195185345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,255,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,255,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,255,0.01754133279124896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,511,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,511,0.023530667026837666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,511,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,1023,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,511,0.023103999594847362
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,1023,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,1023,0.03634133438269297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,1023,0.031189332405726116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,2047,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,2047,0.05981333553791046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,2047,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,2047,0.053632001082102455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,float16,4095,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,float16,4095,0.09871466954549153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,128,1,float16,fp8,4095,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,1,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,64,0,1,float16,fp8,4095,0.08717866738637288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,1,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,1,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,1,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,3,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,7,0.013855999956528345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,3,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,3,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,3,0.014533333480358124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,7,0.013429333766301474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,7,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,7,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,15,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,15,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,15,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,15,0.015013333410024643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,31,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,31,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,31,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,31,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,63,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,63,0.01313599944114685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,63,0.01532799998919169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,63,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,127,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,127,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,127,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,127,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,255,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,255,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,255,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,255,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,511,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,511,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,511,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,511,0.016879999389251072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,1023,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,1023,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,1023,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,1023,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,float16,2047,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,float16,2047,0.04067199925581614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,1,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,128,1,float16,fp8,2047,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,1,0.017322666943073273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,64,0,1,float16,fp8,2047,0.03350933392842611
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,1,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,1,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,3,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,3,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,3,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,3,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,7,0.019082666685183842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,7,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,7,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,7,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,15,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,15,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,15,0.016885332763195038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,15,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,31,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,31,0.01732800031701724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,31,0.017407999684413273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,31,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,63,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,63,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,63,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,63,0.017968000223239262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,127,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,127,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,127,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,255,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,127,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,255,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,255,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,255,0.017866666118303936
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,511,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,511,0.023285334308942158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,511,0.0173333336909612
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,511,0.02128000060717265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,1023,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,1023,0.03632533301909765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,1023,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,1023,0.031354665756225586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,float16,2047,0.02102400114138921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,float16,2047,0.059936001896858215
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,1,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,128,1,float16,fp8,2047,0.02092266579469045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,1,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,64,0,1,float16,fp8,2047,0.05300266544024149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,1,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,1,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,3,0.02619733413060506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,3,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,3,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,3,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,15,0.026154667139053345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,7,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,7,0.025733334322770435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,7,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,7,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,15,0.026933332284291584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,15,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,15,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,31,0.026575999955336254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,31,0.027295999228954315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,31,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,31,0.025077333052953083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,63,0.025093334416548412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,63,0.027162666122118633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,63,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,63,0.025311999022960663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,127,0.025802666942278545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,127,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,127,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,255,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,127,0.025439999997615814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,255,0.027280000348885853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,255,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,255,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,511,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,511,0.0399893323580424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,511,0.026880001028378803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,511,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,1023,0.06005866825580597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,1023,0.025594666600227356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,1023,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,1023,0.053685332338015236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,1,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,float16,2047,0.029605334003766377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,1,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,float16,2047,0.1032533347606659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,1,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,1,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,128,1,float16,fp8,2047,0.029290666182835896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,64,0,1,float16,fp8,2047,0.09060266613960266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,3,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,7,0.010389333590865135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,7,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,15,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,31,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,15,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,15,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,31,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,31,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,63,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,127,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,255,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,511,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,511,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,511,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,511,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,1023,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,1023,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,4095,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,2047,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,4095,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,2047,0.01340266689658165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,2047,0.011823999385039011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,2047,0.013072000195582708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,4095,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,4095,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,8191,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,8191,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,8191,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,8191,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,16383,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,16383,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,16383,0.01139733319481214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,16383,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,32767,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,32767,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,32767,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,32767,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,65535,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,65535,0.01121066634853681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,1,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,65535,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,65535,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,1,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,1,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,1,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,3,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,3,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,3,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,7,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,float16,131071,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,7,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,15,0.009301333377758661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,7,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,31,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,fp8,131071,0.03524799893299738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,0,1,float16,float16,131071,0.043920000394185386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,15,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,64,128,1,float16,fp8,131071,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,15,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,15,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,31,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,31,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,63,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,63,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,63,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,63,0.010496000448862711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,127,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,127,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,127,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,255,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,255,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,255,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,511,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,511,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,511,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,1023,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,2047,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,1023,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,4095,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,2047,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,2047,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,4095,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,4095,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,8191,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,4095,0.015824000040690105
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,16383,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,16383,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,8191,0.011941333611806234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,8191,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,8191,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,16383,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,16383,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,32767,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,32767,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,32767,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,32767,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,65535,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,65535,0.041722665230433144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,1,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,65535,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,65535,0.033439998825391136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,1,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,7,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,fp8,131071,0.05417066812515259
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,float16,131071,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,15,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,0,1,float16,float16,131071,0.06419733166694641
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,64,128,1,float16,fp8,131071,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,15,0.010191999996701876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,31,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,31,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,63,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,63,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,127,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,127,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,127,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,255,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,255,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,1023,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,1023,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,1023,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,511,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,511,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,511,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,2047,0.012175999581813812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,2047,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,2047,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,8191,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,2047,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,4095,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,4095,0.01543466622630755
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,8191,0.017301333447297413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,4095,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,4095,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,8191,0.01788266624013583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,16383,0.0234400009115537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,8191,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,16383,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,16383,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,16383,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,32767,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,32767,0.03985599925120672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,32767,0.011765333513418833
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,32767,0.03164266546567281
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,65535,0.06402666866779327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,65535,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,65535,0.053685332338015236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,65535,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,1,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,1,0.018160000443458557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,1,0.018250666558742523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,float16,131071,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,128,1,float16,fp8,131071,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,1,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,float16,131071,0.11107732852300008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,3,0.018053332964579265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,64,0,1,float16,fp8,131071,0.09471467137336731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,3,0.01887999971707662
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,3,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,3,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,7,0.018800000349680584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,7,0.017727999637524288
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,7,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,7,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,15,0.018346666047970455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,15,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,15,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,15,0.01710933322707812
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,31,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,31,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,31,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,31,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,63,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,63,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,63,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,63,0.01810666670401891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,127,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,127,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,127,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,127,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,255,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,255,0.018650667121013004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,255,0.018325333793958027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,255,0.017743999759356182
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,511,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,511,0.023552000522613525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,511,0.017952000101407368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,511,0.02310933421055476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,float16,1023,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,float16,1023,0.03604800005753835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,1,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,128,1,float16,fp8,1023,0.01829333355029424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,64,0,1,float16,fp8,1023,0.031184000273545582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,1,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,1,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,1,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,3,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,3,0.025434667865435284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,3,0.02533866713444392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,3,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,7,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,7,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,7,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,7,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,15,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,15,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,15,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,15,0.026799999177455902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,31,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,31,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,31,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,31,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,63,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,63,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,63,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,63,0.026170666019121807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,127,0.02720533311367035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,127,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,127,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,127,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,255,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,255,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,255,0.027034667630990345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,255,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,511,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,511,0.04074666649103165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,511,0.026101333399613697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,511,0.03531199942032496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,float16,1023,0.027434666951497395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,float16,1023,0.06097066899140676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,1,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,128,1,float16,fp8,1023,0.027093333502610523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,1,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,64,0,1,float16,fp8,1023,0.05376533170541128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,1,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,1,0.041589332123597465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,3,0.04346133271853129
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,3,0.04377066592375437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,3,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,3,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,7,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,7,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,7,0.041509332756201424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,7,0.03977599988381068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,15,0.043418665726979576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,15,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,15,0.04146666576464971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,15,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,31,0.04355733096599579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,31,0.04399466514587402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,31,0.039919999738534294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,31,0.04137066751718521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,63,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,63,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,63,0.04172799984614054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,63,0.04050133377313614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,127,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,127,0.04376000165939331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,127,0.04154133299986521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,127,0.04152533411979675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,255,0.04548266530036926
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,255,0.04553600152333578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,255,0.04309333364168803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,255,0.039706667264302574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,511,0.04554666578769684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,511,0.06836266815662384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,511,0.042208001017570496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,511,0.06192533175150553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,float16,1023,0.04576533536116282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,float16,1023,0.10699199636777242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,128,1,float16,fp8,1023,0.04286933441956838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,64,0,1,float16,fp8,1023,0.09482133388519287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,1,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,1,0.02754666656255722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,1,0.027072000006834667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,1,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,3,0.02805333336194356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,3,0.027477333943049114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,3,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,3,0.0260959987839063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,7,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,7,0.027503999571005504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,7,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,7,0.025818665822347004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,15,0.02741866558790207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,15,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,15,0.027087998886903126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,15,0.025770666698614757
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,31,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,31,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,31,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,31,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,63,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,63,0.027509334186712902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,63,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,63,0.026474667092164356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,127,0.027445333699385326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,127,0.0273333340883255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,127,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,127,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,255,0.027797333896160126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,255,0.027813332776228588
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,255,0.027471999327341717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,255,0.027322667340437572
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,float16,511,0.04142933338880539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,fp8,511,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,0,1,float16,fp8,511,0.0353973334034284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,1,0.043680002291997276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,1,0.04573333263397217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,1,0.041536000867684685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,1,0.04156800111134847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,3,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,3,0.04491200049718221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,3,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,3,0.041749333341916404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,7,0.04566933214664459
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,7,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,7,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,7,0.04140799989302953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,15,0.043840001026789345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,15,0.04383466641108195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,15,0.04181333382924398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,15,0.04185600082079569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,31,0.04390400151411692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,31,0.045647998650868736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,31,0.04146133363246918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,64,128,1,float16,float16,511,0.029135999580224354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,31,0.04173333446184794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,63,0.043791999419530235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,63,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,63,0.04164800047874451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,63,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,127,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,127,0.04377600053946177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,127,0.041759997606277466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,127,0.041493333876132965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,255,0.04597333570321401
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,255,0.04660266637802124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,255,0.043562665581703186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,255,0.041477332512537636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,float16,511,0.045461331804593406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,float16,511,0.06875200072924297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,128,1,float16,fp8,511,0.04347200194994608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,1,0.0782773345708847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,1,0.07888533174991608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,64,0,1,float16,fp8,511,0.06228266656398773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,1,0.07276799778143565
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,1,0.07231466472148895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,3,0.07860800127188365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,3,0.07843733330567677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,3,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,3,0.07236800094445546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,7,0.07860800127188365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,7,0.07826666533946991
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,7,0.07235200206438701
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,7,0.07254933317502339
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,15,0.07857066889603932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,15,0.07834133505821228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,15,0.07225599884986877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,15,0.0721919983625412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,31,0.07879466811815898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,31,0.07857066889603932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,31,0.07225066423416138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,31,0.07260266443093617
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,63,0.07858133316040039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,63,0.07844799757003784
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,63,0.07217599948247273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,127,0.07996800045172374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,127,0.07845866680145264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,127,0.07216000060240428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,127,0.07216533521811168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,255,0.08240533371766408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,255,0.08038400113582611
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,255,0.07449600100517273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,255,0.07250666618347168
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,1,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,1,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,float16,511,0.08106666803359985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,float16,511,0.12345600128173828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,3,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,3,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,3,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,3,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,511,0.11122133334477742
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,7,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,7,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,7,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,15,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,15,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,15,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,31,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,31,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,31,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,63,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,63,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,0,1,float16,fp8,63,0.072202667593956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,127,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,127,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,127,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,127,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,255,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,255,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,255,0.010015999898314476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,511,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,511,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,1023,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,1023,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,64,128,1,float16,fp8,511,0.07461866736412048
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,2047,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,2047,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,2047,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,4095,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,2047,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,4095,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,4095,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,4095,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,8191,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,8191,0.01741333305835724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,8191,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,8191,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,16383,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,16383,0.02093333254257838
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,16383,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,16383,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,32767,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,32767,0.02550933261712392
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,32767,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,32767,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,65535,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,65535,0.04085866610209147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,65535,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,65535,0.032069332897663116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,1,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,1,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,1,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,3,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,3,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,float16,131071,0.013536000003417334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,3,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,3,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,7,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,15,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,7,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,fp8,131071,0.05384533107280731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,0,1,float16,float16,131071,0.0639519989490509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,15,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,64,128,1,float16,fp8,131071,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,63,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,31,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,31,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,31,0.009818666925032934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,31,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,63,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,63,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,127,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,127,0.009370666618148485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,127,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,127,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,255,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,255,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,511,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,1023,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,511,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,1023,0.010101333260536194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,1023,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,2047,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,2047,0.013530666629473368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,2047,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,2047,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,4095,0.011887999872366587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,4095,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,4095,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,4095,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,8191,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,8191,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,8191,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,8191,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,16383,0.012223999947309494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,16383,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,16383,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,16383,0.021914665897687275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,32767,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,32767,0.04215999941031138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,32767,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,32767,0.031514666974544525
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,65535,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,65535,0.06434666613737743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,65535,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,65535,0.05380799869696299
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,1,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,1,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,3,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,3,0.009984000275532404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,7,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,7,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,15,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,7,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,float16,131071,0.11310399572054546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,15,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,fp8,131071,0.014554666976133982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,31,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,128,1,float16,float16,131071,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,63,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,64,0,1,float16,fp8,131071,0.09344533085823059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,63,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,63,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,127,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,127,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,127,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,255,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,255,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,511,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,511,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,1023,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,1023,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,1023,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,2047,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,2047,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,2047,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,2047,0.015098666151364645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,4095,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,4095,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,4095,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,4095,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,8191,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,8191,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,8191,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,8191,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,16383,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,16383,0.03998400022586187
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,16383,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,16383,0.03141866624355316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,32767,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,32767,0.0637066662311554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,32767,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,32767,0.05411200225353241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,65535,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,65535,0.11002133289972942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,65535,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,65535,0.09075733025868733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,3,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,15,0.009882666791478792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,31,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,31,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,63,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,float16,131071,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,63,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,float16,131071,0.20566399892171225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,255,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,0,1,float16,fp8,131071,0.1694399913152059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,64,128,1,float16,fp8,131071,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,127,0.010368000095089277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,511,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,1023,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,1023,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,2047,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,2047,0.015194666882356008
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,4095,0.01825599993268649
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,4095,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,8191,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,8191,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,16383,0.03975466638803482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,16383,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,1,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,32767,0.06091199815273285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,1,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,3,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,32767,0.040607998768488564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,3,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,7,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,15,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,63,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,float16,65535,0.10127466917037964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,1,128,0,1,float16,fp8,65535,0.06215466558933258
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,63,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,511,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,127,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,127,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,1023,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,255,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,1023,0.012666666259368261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,2047,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,8191,0.03940266619126002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,2047,0.016597333053747814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,4095,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,4095,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,8191,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,16383,0.05983466903368632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,16383,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,32767,0.10150933265686035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,32767,0.061808000008265175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,3,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,1,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,3,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,7,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,15,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,float16,65535,0.1856000026067098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,31,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,127,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,2,128,0,1,float16,fp8,65535,0.10485866665840149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,255,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,63,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,127,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,511,0.01210133358836174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,1023,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,1023,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,2047,0.021141332884629566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,2047,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,4095,0.03904533386230469
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,4095,0.023354666928450268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,8191,0.058448001742362976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,8191,0.039120001097520195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,16383,0.10087466239929199
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,16383,0.0601440022389094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,1,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,32767,0.10201066732406616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,3,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,32767,0.18339733282725015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,3,0.010202666744589806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,7,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,15,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,15,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,fp8,65535,0.18689600626627603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,31,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,31,0.008789333204428354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,8,4,128,0,1,float16,float16,65535,0.34904531637827557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,127,0.010549332946538925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,255,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,255,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,511,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,511,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,4095,0.011557333171367645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,1023,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,1023,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,2047,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,2047,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,4095,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,8191,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,8191,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,32767,0.0281333327293396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,16383,0.019440000255902607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,16383,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,32767,0.02749866743882497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,1,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,1,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,65535,0.03151999910672506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,65535,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,3,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,7,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,15,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,15,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,fp8,131071,0.035349334279696144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,1,128,0,1,float16,float16,131071,0.03558400024970373
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,63,0.010117333382368088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,127,0.01003200002014637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,255,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,511,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,4095,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,2047,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,2047,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,4095,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,8191,0.015941333025693893
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,8191,0.015157333264748255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,16383,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,16383,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,32767,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,32767,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,1,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,65535,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,65535,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,3,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,3,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,7,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,7,0.010410666465759277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,15,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,15,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,31,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,float16,131071,0.02959466725587845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,31,0.00921066664159298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,2,128,0,1,float16,fp8,131071,0.02741333345572154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,63,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,511,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,127,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,127,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,255,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,1023,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,2047,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,2047,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,4095,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,4095,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,8191,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,8191,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,16383,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,16383,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,32767,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,32767,0.020256000260512035
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,1,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,1,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,65535,0.02334933231274287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,65535,0.021317332983016968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,7,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,15,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,float16,131071,0.043141335248947144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,31,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,8,4,128,0,1,float16,fp8,131071,0.027888000011444092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,31,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,127,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,127,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,255,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,1023,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,2047,0.01240533341964086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,2047,0.011237333218256632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,4095,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,4095,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,8191,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,8191,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,16383,0.021066665649414062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,16383,0.020938667158285778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,32767,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,32767,0.0230880007147789
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,65535,0.02714666724205017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,1,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,65535,0.023562667270501454
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,3,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,3,0.010426666587591171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,15,0.01022933361430963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,15,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,fp8,131071,0.027765333652496338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,1,128,0,1,float16,float16,131071,0.03138133386770884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,31,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,63,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,63,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,127,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,127,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,255,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,2047,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,511,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,4095,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,511,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,1023,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,1023,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,2047,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,4095,0.011333333949247995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,8191,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,8191,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,16383,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,16383,0.0191040001809597
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,32767,0.021920000513394673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,32767,0.019893333315849304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,1,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,65535,0.02502399931351344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,1,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,3,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,65535,0.022511998812357586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,7,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,31,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,float16,131071,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,63,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,255,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,2,128,0,1,float16,fp8,131071,0.028410665690898895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,63,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,511,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,2047,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,2047,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,4095,0.01515199989080429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,4095,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,8191,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,8191,0.01685333376129468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,16383,0.019354666272799175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,16383,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,32767,0.02369600037733714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,32767,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,1,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,1,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,65535,0.0403466671705246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,65535,0.025349333882331848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,7,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,15,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,31,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,float16,131071,0.061994666854540505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,31,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,8,4,128,0,1,float16,fp8,131071,0.04155733436346054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,63,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,127,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,255,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,511,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,511,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,4095,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,1023,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,1023,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,2047,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,2047,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,4095,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,8191,0.0395413339138031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,16383,0.039936001102129616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,16383,0.06071466704209646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,8191,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,3,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,3,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,fp8,32767,0.062128002444903054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,7,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,15,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,15,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,1,128,0,1,float16,float16,32767,0.10196266571680705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,63,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,127,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,511,0.012714666624863943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,2047,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,1023,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,511,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,4095,0.023978665471076965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,1023,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,8191,0.05951466659704844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,2047,0.019215999792019527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,4095,0.03807999938726425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,8191,0.03842666745185852
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,1,0.012373333175977072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,16383,0.1006719966729482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,1,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,16383,0.06047999858856201
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,7,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,3,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,3,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,7,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,float16,32767,0.18470933039983115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,15,0.01137599969903628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,63,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,15,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,2,128,0,1,float16,fp8,32767,0.10257066289583842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,31,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,31,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,63,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,127,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,127,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,255,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,511,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,511,0.013914667069911957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,1023,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,1023,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,2047,0.039594667653242745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,2047,0.02499733368555705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,4095,0.060234665870666504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,4095,0.03978666663169861
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,8191,0.10242666800816853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,8191,0.06216000020503998
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,1,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,1,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,16383,0.18506133556365967
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,16383,0.10418132940928142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,15,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,7,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,float16,32767,0.35202133655548096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,31,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,15,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,8,4,128,0,1,float16,fp8,32767,0.18897066513697305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,31,0.010005333150426546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,127,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,127,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,255,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,511,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,1023,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,8191,0.017504000415404636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,1023,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,2047,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,2047,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,4095,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,4095,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,8191,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,16383,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,16383,0.019978666057189304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,32767,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,32767,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,65535,0.02644266684850057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,1,0.010058666889866194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,65535,0.023418667415777843
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,3,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,7,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,7,0.009317333499590555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,15,0.00979200005531311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,15,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,31,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,63,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,63,0.009743999689817429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,float16,131071,0.04552533229192098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,127,0.00984533317387104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,1,128,0,1,float16,fp8,131071,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,255,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,1023,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,1023,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,2047,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,2047,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,4095,0.01540800059835116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,4095,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,8191,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,8191,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,16383,0.021327999730904896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,16383,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,32767,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,32767,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,65535,0.04378133515516917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,1,0.010597333312034607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,3,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,3,0.009194666519761086
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,65535,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,15,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,15,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,31,0.009733333562811216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,31,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,63,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,63,0.009392000113924345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,float16,131071,0.06429333488146464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,127,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,2,128,0,1,float16,fp8,131071,0.045663997530937195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,255,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,255,0.010512000570694605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,511,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,4095,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,8191,0.01912533367673556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,1023,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,2047,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,2047,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,4095,0.015669333438078564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,32767,0.04154666761557261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,8191,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,16383,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,16383,0.01972266659140587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,1,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,32767,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,65535,0.04112533231576284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,65535,0.060165335734685264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,3,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,7,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,float16,131071,0.09993599851926167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,15,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,15,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,31,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,63,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,8,4,128,0,1,float16,fp8,131071,0.060959999759991966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,127,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,127,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,1023,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,511,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,1023,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,2047,0.021007999777793884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,2047,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,4095,0.038736000657081604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,4095,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,8191,0.06050133208433787
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,1,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,8191,0.03963200002908707
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,1,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,fp8,16383,0.05983999868233999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,3,0.012106666962305704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,1,128,0,1,float16,float16,16383,0.10190932949384053
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,7,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,7,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,15,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,31,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,63,0.01246400053302447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,63,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,127,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,127,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,255,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,255,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,2047,0.03931200007597605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,511,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,511,0.013839999834696451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,1023,0.017637333522240322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,4095,0.041082667807737984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,1023,0.015919999529918034
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,8191,0.10263466835021973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,2047,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,4095,0.06010133524735769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,8191,0.06187733511130015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,1,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,1,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,3,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,3,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,float16,16383,0.1858666737874349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,7,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,2,128,0,1,float16,fp8,16383,0.1048426628112793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,7,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,15,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,15,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,63,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,127,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,31,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,31,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,63,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,127,0.013882666826248169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,255,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,255,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,511,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,511,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,1023,0.03489066660404205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,1023,0.02298133323589961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,2047,0.06018666426340739
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,2047,0.04010133445262909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,4095,0.10283733407656352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,4095,0.06226666768391927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,8191,0.18622400363286337
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,1,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,8191,0.10525866349538167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,1,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,3,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,7,0.012426666915416718
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,3,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,7,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,fp8,16383,0.18996800978978476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,8,4,128,0,1,float16,float16,16383,0.35500800609588623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,15,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,63,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,15,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,31,0.012645332763592402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,63,0.01310933381319046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,127,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,127,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,255,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,255,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,511,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,511,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,1023,0.017829333742459614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,1023,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,2047,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,2047,0.02510400116443634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,1,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,float16,4095,0.06187200049559275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,1,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,1,128,0,1,float16,fp8,4095,0.04110399881998698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,3,0.014720000326633453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,3,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,7,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,7,0.014949332922697067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,15,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,15,0.014736000448465347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,31,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,31,0.013445333888133367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,63,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,63,0.013503999759753546
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,127,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,127,0.014970666418472925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,255,0.01492799942692121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,255,0.014666666587193808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,511,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,2047,0.062090665102005005
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,511,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,1023,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,1023,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,2047,0.04247466723124186
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,1,0.019194666296243668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,3,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,float16,4095,0.10291733344395955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,1,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,2,128,0,1,float16,fp8,4095,0.06408533453941345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,3,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,7,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,7,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,15,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,15,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,31,0.01926400015751521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,31,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,63,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,63,0.017349333812793095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,127,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,127,0.01830400029818217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,255,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,255,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,511,0.033344000577926636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,511,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,1023,0.05268266797065735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,1023,0.035546667873859406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,2047,0.09628799557685852
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,float16,4095,0.1734453241030375
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,1,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,2047,0.059877331058184304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,8,4,128,0,1,float16,fp8,4095,0.09678933024406433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,1,0.013962666193644205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,3,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,3,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,7,0.015189333508412043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,15,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,7,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,15,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,31,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,31,0.014053333550691605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,63,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,63,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,127,0.015034666905800501
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,127,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,511,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,255,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,255,0.014607999473810196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,511,0.01791999985774358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,1023,0.034917332231998444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,1023,0.023221333821614582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,1,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,1,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,float16,2047,0.060864001512527466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,1,128,0,1,float16,fp8,2047,0.041573333243529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,3,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,3,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,7,0.020202666521072388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,7,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,15,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,15,0.019066666563351948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,63,0.018885333091020584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,31,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,31,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,63,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,127,0.01934933289885521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,127,0.01706133286158244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,511,0.023631999890009563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,255,0.01933866615096728
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,255,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,1023,0.035589332381884255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,511,0.03316800047953924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,1023,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,1,0.02940800040960312
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,1,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,float16,2047,0.09849599997202556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,2,128,0,1,float16,fp8,2047,0.0591839998960495
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,7,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,3,0.029578665892283123
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,3,0.026949333647886913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,7,0.029120000700155895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,63,0.02975466599067052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,15,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,63,0.027248000105222065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,15,0.02699733277161916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,31,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,31,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,127,0.029370665550231934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,127,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,255,0.03359466542800268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,255,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,511,0.054799998799959816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,511,0.03940266619126002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,1023,0.09293333689371745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,1023,0.05780800183614095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,float16,2047,0.1746079921722412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,8,4,128,0,1,float16,fp8,2047,0.10048533479372661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,7,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,31,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,127,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,127,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,255,0.008874666566650072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,2047,0.01302933320403099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,511,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,1023,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,2047,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,4095,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,4095,0.015077333897352219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,8191,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,8191,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,16383,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,16383,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,32767,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,32767,0.02128533273935318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,1,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,3,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,65535,0.04222933451334635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,65535,0.027130665878454845
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,7,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,15,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,7,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,15,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,31,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,63,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,fp8,131071,0.043807998299598694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,127,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,1,128,0,1,float16,float16,131071,0.06434133152167003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,255,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,2047,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,4095,0.01624533285697301
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,1023,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,8191,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,16383,0.023311999936898548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,16383,0.020879998803138733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,2047,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,4095,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,8191,0.01922133316596349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,32767,0.041802664597829185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,32767,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,1,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,65535,0.03995733211437861
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,3,0.009477333476146063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,65535,0.0599839985370636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,15,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,31,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,127,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,127,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,float16,131071,0.10101866722106934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,255,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,2,128,0,1,float16,fp8,131071,0.06206400195757548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,511,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,511,0.011183999478816986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,1023,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,4095,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,8191,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,1023,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,2047,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,2047,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,4095,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,32767,0.058746665716171265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,8191,0.018960000326236088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,16383,0.03852800031503042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,16383,0.025029333929220837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,32767,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,65535,0.10110933581988017
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,1,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,65535,0.060559997955958046
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,1,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,3,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,3,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,7,0.019280000279347103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,7,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,15,0.020954666038354237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,15,0.019141333798567455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,31,0.021322667598724365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,fp8,131071,0.10712533195813496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,31,0.019285333653291065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,8,4,128,0,1,float16,float16,131071,0.18682666619618735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,63,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,63,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,127,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,127,0.019189332922299702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,255,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,255,0.019274666905403137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,511,0.03505599995454153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,511,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,1,0.031146667897701263
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,float16,1023,0.05500799914201101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,1,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,3,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,3,0.029301332930723827
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,1,128,0,1,float16,fp8,1023,0.03624533365170161
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,7,0.0295413335164388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,7,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,15,0.03126933425664902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,15,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,31,0.029546665648619335
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,31,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,63,0.029445332785447437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,63,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,127,0.029850666721661884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,127,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,255,0.034517332911491394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,255,0.027141332626342773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,511,0.05579199890295664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,511,0.039690665900707245
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,1,0.04981866478919983
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,float16,1023,0.0944106678167979
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,1,0.04349866509437561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,3,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,2,128,0,1,float16,fp8,1023,0.058693334460258484
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,3,0.04363200068473816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,7,0.04974400003751119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,7,0.04353066782156626
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,15,0.049642667174339294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,15,0.04354133208592733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,31,0.04976533353328705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,31,0.043023998538653054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,63,0.049770668148994446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,63,0.04347200194994608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,127,0.0499893327554067
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,127,0.043466667334238686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,255,0.05474133292833964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,255,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,511,0.09564266602198283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,511,0.06644266843795776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,float16,1023,0.17214399576187134
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,8,4,128,0,1,float16,fp8,1023,0.10288000106811523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,1,0.03329599897066752
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,1,0.029157333076000214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,3,0.03323200096686681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,3,0.02908266584078471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,7,0.03160000095764796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,7,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,15,0.03164800008138021
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,15,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,31,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,31,0.029285334050655365
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,63,0.033359999457995095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,63,0.02934933453798294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,127,0.03140799949566523
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,127,0.029232000311215717
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,255,0.037263999382654824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,255,0.029264000554879505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,float16,511,0.058037335673967995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,1,0.051962668697039284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,1,0.04398400088151296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,1,128,0,1,float16,fp8,511,0.0415786678592364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,3,0.05166399975617727
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,3,0.04568533102671305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,7,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,7,0.04418133199214935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,15,0.0517493337392807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,15,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,31,0.0517439991235733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,31,0.04387733340263367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,63,0.05167999863624573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,63,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,127,0.0539626677831014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,127,0.04568000137805939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,255,0.0581226646900177
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,255,0.04725866516431173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,1,0.09077866872151692
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,1,0.0783679982026418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,float16,511,0.09802666306495667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,2,128,0,1,float16,fp8,511,0.06807999809583028
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,3,0.09171733260154724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,3,0.07863466441631317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,7,0.09195733070373535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,7,0.07745066781838734
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,15,0.09131733576456706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,15,0.07805866499741872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,31,0.09071999788284302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,31,0.0783786674340566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,63,0.09186666210492452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,63,0.07849599917729695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,127,0.09109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,7,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,127,0.07891733447710673
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,255,0.09779199957847595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,255,0.07969599962234497
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,31,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,float16,511,0.17880000670750937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,8,4,128,0,1,float16,fp8,511,0.12158399820327759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,63,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,127,0.00949866697192192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,255,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,2047,0.015087999403476715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,255,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,511,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,4095,0.016917333006858826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,1023,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,2047,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,4095,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,8191,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,8191,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,16383,0.02342933416366577
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,32767,0.04111466556787491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,16383,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,32767,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,1,0.010485333700974783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,7,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,65535,0.06085866689682007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,7,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,15,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,65535,0.04161066561937332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,15,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,31,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,63,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,511,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,fp8,131071,0.06339199841022491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,1,128,0,1,float16,float16,131071,0.10311466455459595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,1023,0.011648000528415045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,1023,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,2047,0.015184000134468079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,2047,0.015072000523408255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,8191,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,4095,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,16383,0.03774400055408478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,16383,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,4095,0.016165333489576977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,8191,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,32767,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,32767,0.03939733405907949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,1,0.00983466642598311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,3,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,7,0.010197333370645842
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,7,0.010522666076819101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,65535,0.06164266665776571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,65535,0.10219200452168782
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,63,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,31,0.011855999628702799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,255,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,127,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,127,0.011343999455372492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,255,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,float16,131071,0.18763200441996256
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,511,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,2047,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,1023,0.012506666282812754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,2,128,0,1,float16,fp8,131071,0.1070186694463094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,1023,0.011264000087976456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,2047,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,4095,0.02035733312368393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,4095,0.01905599981546402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,8191,0.038005332152048744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,8191,0.02510933329661687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,16383,0.059024001161257424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,16383,0.039408000806967415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,32767,0.10113599896430969
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,32767,0.062463998794555664
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,1,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,1,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,1,0.009525333220760027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,65535,0.1846239964167277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,3,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,65535,0.10486933588981628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,1,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,7,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,3,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,7,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,15,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,fp8,131071,0.1937333345413208
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,8,4,128,0,1,float16,float16,131071,0.3534826834996541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,31,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,31,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,127,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,63,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,63,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,63,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,127,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,127,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,127,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,255,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,255,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,511,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,1023,0.010144000252087912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,511,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,511,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,1023,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,1023,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,1023,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,2047,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,2047,0.014192000031471252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,2047,0.01331199953953425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,2047,0.013738666971524557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,4095,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,4095,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,4095,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,8191,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,4095,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,8191,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,8191,0.01911466692884763
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,8191,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,16383,0.0227360005180041
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,16383,0.024559999505678814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,16383,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,16383,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,32767,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,32767,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,32767,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,32767,0.03225066761175791
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,65535,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,65535,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,1,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,1,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,65535,0.05412266651789347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,65535,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,1,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,1,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,3,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,3,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,7,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,15,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,15,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,15,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,31,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,float16,131071,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,float16,131071,0.11351999640464783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,63,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,31,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,0,1,float16,fp8,131071,0.09436266620953877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,31,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,64,128,1,float16,fp8,131071,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,63,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,63,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,63,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,127,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,127,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,511,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,511,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,255,0.009328000247478485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,127,0.00943999985853831
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,255,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,255,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,511,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,511,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,2047,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,1023,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,1023,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,2047,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,2047,0.016048000504573185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,2047,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,4095,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,8191,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,4095,0.018810667097568512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,4095,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,4095,0.017509333789348602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,8191,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,16383,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,8191,0.013290667285521826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,8191,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,16383,0.03956266740957896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,16383,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,16383,0.03218133250872294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,32767,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,32767,0.06402133405208588
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,32767,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,32767,0.05403199791908264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,65535,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,65535,0.11090667049090068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,65535,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,65535,0.09141332904497783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,1,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,1,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,3,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,1,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,3,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,7,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,7,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,7,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,7,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,15,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,15,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,15,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,15,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,31,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,float16,131071,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,31,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,31,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,31,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,float16,131071,0.2053920030593872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,63,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,63,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,128,1,float16,fp8,131071,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,63,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,127,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,127,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,64,0,1,float16,fp8,131071,0.16870399316151938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,127,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,127,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,255,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,255,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,511,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,2047,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,511,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,1023,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,1023,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,4095,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,2047,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,2047,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,8191,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,2047,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,8191,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,4095,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,16383,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,4095,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,16383,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,4095,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,8191,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,8191,0.010079999764760336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,16383,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,16383,0.01882133384545644
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,32767,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,32767,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,32767,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,32767,0.026848000784715016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,65535,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,65535,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,65535,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,1,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,65535,0.029509333272775013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,1,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,1,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,3,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,3,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,float16,131071,0.03372266640265783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,7,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,7,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,float16,131071,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,128,1,float16,fp8,131071,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,7,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,15,0.008869333192706108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,15,0.009312000125646591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,15,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,64,0,1,float16,fp8,131071,0.03330666571855545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,15,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,31,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,31,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,31,0.008858666444818178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,31,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,63,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,63,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,63,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,127,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,127,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,127,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,255,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,255,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,255,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,511,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,511,0.009429333110650381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,511,0.009648000200589498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,511,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,1023,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,1023,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,2047,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,2047,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,4095,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,1023,0.010112000008424124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,1023,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,2047,0.008853333070874214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,2047,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,4095,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,4095,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,4095,0.01146666705608368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,8191,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,8191,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,8191,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,8191,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,16383,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,16383,0.01930133377512296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,32767,0.023071999351183575
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,16383,0.010538666198650995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,16383,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,32767,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,32767,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,32767,0.021082667013009388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,65535,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,65535,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,65535,0.023770667612552643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,1,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,65535,0.02330133318901062
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,1,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,3,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,3,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,3,0.009178666397929192
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,3,0.010170666500926018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,fp8,131071,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,7,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,float16,131071,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,7,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,7,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,128,1,float16,fp8,131071,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,64,0,1,float16,float16,131071,0.02787200113137563
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,15,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,15,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,15,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,15,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,31,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,31,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,63,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,63,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,63,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,255,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,127,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,127,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,255,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,255,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,255,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,511,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,511,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,1023,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,511,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,511,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,1023,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,2047,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,4095,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,2047,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,2047,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,2047,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,4095,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,4095,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,4095,0.01119999960064888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,8191,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,16383,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,8191,0.01584533353646596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,8191,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,8191,0.014997333288192749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,16383,0.020741333564122517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,32767,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,16383,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,16383,0.019167999426523846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,32767,0.023445333043734234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,32767,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,32767,0.021173333128293354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,65535,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,65535,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,65535,0.024143998821576435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,1,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,65535,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,1,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,1,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,1,0.008757333581646284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,3,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,3,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,3,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,7,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,float16,131071,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,7,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,7,0.010128000130256018
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,128,1,float16,fp8,131071,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,float16,131071,0.028181334336598713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,15,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,15,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,31,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,31,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,31,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,15,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,64,0,1,float16,fp8,131071,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,31,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,63,0.008832000195980072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,63,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,63,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,255,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,255,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,63,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,127,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,511,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,127,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,127,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,127,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,255,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,255,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,511,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,1023,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,1023,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,1023,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,1023,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,2047,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,2047,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,4095,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,2047,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,2047,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,4095,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,4095,0.01129066695769628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,4095,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,8191,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,8191,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,8191,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,32767,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,8191,0.015423999478419622
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,16383,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,16383,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,16383,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,16383,0.01833600054184596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,32767,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,32767,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,32767,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,65535,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,65535,0.022682666778564453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,65535,0.021157334248224895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,65535,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,1,0.009663999701539675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,1,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,1,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,3,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,3,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,7,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,float16,131071,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,7,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,7,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,128,1,float16,fp8,131071,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,float16,131071,0.02864533414443334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,15,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,15,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,64,0,1,float16,fp8,131071,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,15,0.01028266673286756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,31,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,31,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,31,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,63,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,63,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,63,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,127,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,127,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,127,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,255,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,511,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,255,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,255,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,255,0.009866666669646898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,511,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,511,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,511,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,1023,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,1023,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,2047,0.013232000172138214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,2047,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,2047,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,2047,0.015477333217859268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,4095,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,4095,0.019391999890406925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,4095,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,4095,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,8191,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,8191,0.024501333634058636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,8191,0.01309866706530253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,8191,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,16383,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,16383,0.04145599901676178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,16383,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,16383,0.031583999594052635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,32767,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,32767,0.06406933565934499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,32767,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,1,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,32767,0.05506666501363119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,1,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,1,0.01108266661564509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,1,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,3,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,3,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,7,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,15,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,15,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,float16,65535,0.111135999361674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,0,1,float16,fp8,65535,0.0916373332341512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,15,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,float16,65535,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,64,128,1,float16,fp8,65535,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,31,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,63,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,63,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,63,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,127,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,127,0.010437333335479101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,127,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,511,0.011445333560307821
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,255,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,1023,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,255,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,511,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,2047,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,2047,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,1023,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,1023,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,1023,0.013178666432698568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,2047,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,2047,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,4095,0.013157332936922709
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,4095,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,4095,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,4095,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,8191,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,8191,0.03966933240493139
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,8191,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,16383,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,8191,0.03162133445342382
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,16383,0.062181333700815834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,16383,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,16383,0.05381333331267039
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,32767,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,32767,0.10550933082898457
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,32767,0.012896000097195307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,32767,0.09308800101280212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,1,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,1,0.008767999708652496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,1,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,3,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,3,0.01007466639081637
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,3,0.010351999973257383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,7,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,7,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,7,0.010565333068370819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,float16,65535,0.01320533330241839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,float16,65535,0.19672532876332602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,15,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,128,1,float16,fp8,65535,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,15,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,15,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,64,0,1,float16,fp8,65535,0.1685333251953125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,15,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,31,0.009813333551088968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,31,0.01044800008336703
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,127,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,31,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,63,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,63,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,63,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,63,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,127,0.009829333052039146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,127,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,127,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,255,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,255,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,255,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,511,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,511,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,1023,0.009525333220760027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,1023,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,1023,0.01198400060335795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,4095,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,4095,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,4095,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,2047,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,8191,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,8191,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,2047,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,2047,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,2047,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,4095,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,16383,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,8191,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,8191,0.017237332959969837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,16383,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,16383,0.01916266605257988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,16383,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,32767,0.021040000021457672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,32767,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,32767,0.021013334393501282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,32767,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,65535,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,65535,0.02346666653951009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,65535,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,1,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,65535,0.023152001202106476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,1,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,1,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,1,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,3,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,3,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,7,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,3,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,15,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,fp8,131071,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,128,1,float16,float16,131071,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,7,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,float16,131071,0.029685333371162415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,64,0,1,float16,fp8,131071,0.029535998900731403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,15,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,31,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,31,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,31,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,31,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,63,0.010538666198650995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,63,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,63,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,127,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,127,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,127,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,127,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,255,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,511,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,511,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,511,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,511,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,1023,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,1023,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,2047,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,2047,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,4095,0.015200000256299973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,2047,0.012058666596810022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,2047,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,4095,0.011407999942700068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,4095,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,4095,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,8191,0.011839999506870905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,8191,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,8191,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,8191,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,16383,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,16383,0.019173332800467808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,16383,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,16383,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,32767,0.02091199904680252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,32767,0.012602667013804117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,32767,0.020981334149837494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,65535,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,32767,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,65535,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,65535,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,1,0.010431999961535135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,65535,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,1,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,1,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,1,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,3,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,3,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,float16,131071,0.013237333546082178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,float16,131071,0.045882667104403176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,3,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,7,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,0,1,float16,fp8,131071,0.03535466641187668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,64,128,1,float16,fp8,131071,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,7,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,7,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,15,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,15,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,31,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,31,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,63,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,63,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,63,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,127,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,255,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,255,0.009904000287254652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,511,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,1023,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,511,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,511,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,1023,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,1023,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,1023,0.014682666709025701
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,2047,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,2047,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,2047,0.013434667140245438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,2047,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,4095,0.013013333082199097
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,4095,0.023082666099071503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,4095,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,4095,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,8191,0.03958933303753535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,8191,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,8191,0.0136266661187013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,8191,0.031386665999889374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,16383,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,16383,0.06186666587988535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,16383,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,16383,0.055258666475613914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,1,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,1,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,1,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,1,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,3,0.011488000551859537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,3,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,3,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,3,0.012389333297808966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,7,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,7,0.011525332927703857
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,fp8,32767,0.013781332721312841
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,7,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,7,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,15,0.011866666376590729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,128,1,float16,float16,32767,0.013253333667914072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,float16,32767,0.107424000898997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,64,0,1,float16,fp8,32767,0.09270933270454407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,15,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,15,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,15,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,31,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,31,0.01157333329319954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,31,0.011551999797423681
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,31,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,127,0.011605333536863327
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,63,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,127,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,63,0.011317333827416102
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,63,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,127,0.011328000575304031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,127,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,255,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,255,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,255,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,1023,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,511,0.0116799995303154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,1023,0.0124746672809124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,511,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,511,0.012597333639860153
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,511,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,2047,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,1023,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,1023,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,2047,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,2047,0.025018667181332905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,2047,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,4095,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,4095,0.039434666434923805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,4095,0.015018666783968607
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,4095,0.03338133295377096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,8191,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,8191,0.06286933521429698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,8191,0.015103999525308609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,8191,0.055685331424077354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,16383,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,16383,0.10700266559918721
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,16383,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,16383,0.09322133660316467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,1,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,1,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,1,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,3,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,3,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,3,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,7,0.01173866664369901
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,7,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,float16,32767,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,15,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,15,0.011813333878914515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,15,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,128,1,float16,fp8,32767,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,float16,32767,0.19715199867884317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,64,0,1,float16,fp8,32767,0.16980799039204916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,31,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,31,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,31,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,31,0.011989332735538483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,63,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,63,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,63,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,127,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,127,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,127,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,127,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,255,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,255,0.011168000598748526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,255,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,511,0.012063999970753988
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,511,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,511,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,511,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,1023,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,1023,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,1023,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,1023,0.01600533351302147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,2047,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,2047,0.0252960001428922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,2047,0.015141333142916361
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,2047,0.02312533309062322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,4095,0.014677333335081736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,4095,0.04065600037574768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,4095,0.015130666395028433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,4095,0.033189333975315094
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,1,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,1,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,float16,8191,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,float16,8191,0.06317866841952006
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,1,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,1,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,128,1,float16,fp8,8191,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,64,0,1,float16,fp8,8191,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,3,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,3,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,3,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,7,0.014266667266686758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,3,0.014010666559139887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,7,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,7,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,7,0.013770667215188345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,15,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,15,0.01381333296497663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,31,0.01303999995191892
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,31,0.01368533323208491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,15,0.014277332772811254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,15,0.01393066719174385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,31,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,31,0.013125333935022354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,63,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,63,0.014261333892742792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,63,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,63,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,127,0.015002666662136713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,255,0.014592000593741735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,127,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,127,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,127,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,255,0.01358933374285698
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,255,0.013061333447694778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,511,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,255,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,511,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,511,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,511,0.013242666920026144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,1023,0.014479999740918478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,1023,0.023002666731675465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,1023,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,1023,0.02111999938885371
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,2047,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,2047,0.04156800111134847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,2047,0.01692266638080279
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,2047,0.033333333830038704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,4095,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,4095,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,4095,0.06404266754786174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,4095,0.056746666630109154
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,1,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,float16,8191,0.016821333517630894
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,float16,8191,0.10830932855606079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,1,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,1,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,128,1,float16,fp8,8191,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,64,0,1,float16,fp8,8191,0.0946560005346934
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,1,0.014938666174809137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,3,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,3,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,3,0.014064000298579534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,3,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,7,0.014815999815861383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,7,0.013141332815090815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,7,0.013199999928474426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,7,0.014773332824309668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,15,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,31,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,15,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,15,0.014442666123310724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,15,0.014789332946141561
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,31,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,31,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,31,0.014842666685581207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,63,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,63,0.014597332725922266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,63,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,63,0.012938667088747025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,127,0.013189333180586496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,127,0.014170666535695394
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,255,0.014741333822409311
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,127,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,127,0.012944000462690989
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,255,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,255,0.013487999637921652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,255,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,511,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,511,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,511,0.01332266628742218
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,1023,0.014757333944241205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,511,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,1023,0.023077333966890972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,1023,0.013397333522637686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,1023,0.021125334004561108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,2047,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,2047,0.0409706657131513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,2047,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,2047,0.033546666304270424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,1,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,1,0.017525333911180496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,float16,4095,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,1,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,128,1,float16,fp8,4095,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,float16,4095,0.06398933132489522
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,1,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,64,0,1,float16,fp8,4095,0.0558186670144399
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,3,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,3,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,3,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,3,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,7,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,7,0.018186666071414948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,7,0.01701333373785019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,7,0.016938666502634685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,15,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,15,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,15,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,15,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,31,0.017242666333913803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,31,0.01854933301607768
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,31,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,31,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,63,0.01729600007335345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,63,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,63,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,63,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,127,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,127,0.01814933369557063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,255,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,127,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,127,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,255,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,255,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,255,0.017130666722853977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,511,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,511,0.0189280000825723
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,1023,0.017125333348910015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,511,0.023386667172114056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,511,0.01828266680240631
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,1023,0.035455999275048576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,1023,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,1023,0.029440000653266907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,2047,0.021301334102948506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,2047,0.0602453351020813
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,2047,0.021221332252025604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,2047,0.054010664423306785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,1,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,1,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,1,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,float16,4095,0.02109333376089732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,3,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,float16,4095,0.09915199875831604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,3,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,0,1,float16,fp8,4095,0.08659199873606364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,64,128,1,float16,fp8,4095,0.02033599962790807
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,3,0.00985599992175897
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,7,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,7,0.009872000043590864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,7,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,15,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,15,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,15,0.010304000228643417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,15,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,31,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,31,0.009461333354314169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,31,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,127,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,63,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,63,0.010010666524370512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,63,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,127,0.009989333028594652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,127,0.009632000078757605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,255,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,255,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,255,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,1023,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,511,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,511,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,511,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,2047,0.012869333227475485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,1023,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,1023,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,2047,0.013050666699806849
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,4095,0.011749333391586939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,2047,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,2047,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,8191,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,4095,0.012629333883523941
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,4095,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,4095,0.01505600040157636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,8191,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,8191,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,8191,0.017008000363906223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,16383,0.012453333785136541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,16383,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,16383,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,16383,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,32767,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,32767,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,32767,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,32767,0.021061333517233532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,65535,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,65535,0.027461332579453785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,65535,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,1,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,65535,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,1,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,1,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,1,0.009375999992092451
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,3,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,3,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,3,0.009359999870260557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,7,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,7,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,fp8,131071,0.014981333166360855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,128,1,float16,float16,131071,0.01404800017674764
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,7,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,15,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,float16,131071,0.044218664367993675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,15,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,64,0,1,float16,fp8,131071,0.03545066714286804
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,15,0.00960533320903778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,15,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,31,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,31,0.010064000263810158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,31,0.009583999713261923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,63,0.009685333197315535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,31,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,63,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,63,0.010159999753038088
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,255,0.009018666421373686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,127,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,127,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,127,0.008816000074148178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,255,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,255,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,255,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,511,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,1023,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,1023,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,2047,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,1023,0.009493333597977957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,1023,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,2047,0.011296000331640244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,2047,0.013936000565687815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,2047,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,4095,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,4095,0.016234666109085083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,4095,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,4095,0.01516266663869222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,8191,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,8191,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,8191,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,8191,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,16383,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,16383,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,16383,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,16383,0.019071999937295914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,32767,0.025402667621771496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,32767,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,32767,0.012613333761692047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,32767,0.023024000227451324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,65535,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,65535,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,65535,0.03172266731659571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,65535,0.04139200101296107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,1,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,1,0.01756799966096878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,1,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,float16,131071,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,128,1,float16,fp8,131071,0.014080000420411428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,1,0.01730666682124138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,float16,131071,0.06273599962393443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,3,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,3,0.018778666853904724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,3,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,64,0,1,float16,fp8,131071,0.055733333031336464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,3,0.017077332983414333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,7,0.017674667139848072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,7,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,7,0.01714666684468587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,7,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,15,0.017258666455745697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,15,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,15,0.017194667210181553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,15,0.017082666357358296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,31,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,31,0.01876266673207283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,31,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,31,0.016943999876578648
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,63,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,63,0.017055999487638474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,63,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,63,0.018021332720915478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,127,0.01728533332546552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,127,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,127,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,127,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,255,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,255,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,255,0.01883200059334437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,255,0.017114666601022083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,511,0.023141334454218548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,511,0.017903999735911686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,511,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,511,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,1023,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,1023,0.018901333212852478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,1023,0.03583999971548716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,1023,0.031173333525657654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,1,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,float16,2047,0.021370666722456615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,float16,2047,0.0603359987338384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,1,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,128,1,float16,fp8,2047,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,1,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,64,0,1,float16,fp8,2047,0.05379199981689453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,1,0.025087999800841015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,3,0.027114666998386383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,3,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,3,0.025013332565625507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,3,0.025370667378107708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,7,0.026021334032217663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,7,0.026767998933792114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,7,0.025263999899228413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,7,0.025199999411900837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,15,0.027082666754722595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,15,0.025888000925381977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,15,0.02534399926662445
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,15,0.025045332809289295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,31,0.026421333352724712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,31,0.027242665489514668
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,31,0.02498133232196172
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,31,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,63,0.025583999852339428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,63,0.026122666895389557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,63,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,63,0.02516799916823705
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,127,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,127,0.027210667729377747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,127,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,127,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,255,0.0269813338915507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,255,0.026533332963784535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,255,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,255,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,511,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,511,0.040063999593257904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,511,0.025413334369659424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,511,0.034128000338872276
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,1023,0.02701866626739502
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,1023,0.059952000776926674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,1023,0.025306666890780132
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,1023,0.05395199855168661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,float16,2047,0.10322133700052898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,float16,2047,0.030159999926884968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,128,1,float16,fp8,2047,0.02914133419593175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,64,0,1,float16,fp8,2047,0.09058666229248047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,1,0.027072000006834667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,1,0.027600000301996868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,1,0.025519999365011852
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,1,0.025381334125995636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,3,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,3,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,3,0.02513599892457326
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,3,0.02516266703605652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,7,0.02739199995994568
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,7,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,7,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,7,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,15,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,15,0.027024000883102417
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,15,0.025248001019159954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,15,0.025493333737055462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,31,0.027456000447273254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,31,0.027258666853109997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,31,0.025477332373460133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,31,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,63,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,63,0.027066667874654133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,63,0.025642665723959606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,63,0.025397333006064098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,127,0.02739733209212621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,127,0.027274665733178455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,127,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,127,0.025237334271272022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,255,0.02701333413521449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,255,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,255,0.02718399961789449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,255,0.02535466601451238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,511,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,511,0.040762667854626976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,511,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,511,0.03566933423280716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,float16,1023,0.02734400083621343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,float16,1023,0.06002666552861532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,128,1,float16,fp8,1023,0.027189334233601887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,1,0.04457599918047587
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,64,0,1,float16,fp8,1023,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,1,0.04362666606903076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,1,0.04041066765785217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,1,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,3,0.043578664461771645
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,3,0.043493335445721946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,3,0.04159999887148539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,7,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,7,0.04048533240954081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,7,0.04378666480382284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,7,0.0414986660083135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,15,0.043840001026789345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,15,0.04347200194994608
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,15,0.041221333046754204
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,15,0.04051200052102407
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,31,0.043765331308046974
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,31,0.043824002146720886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,31,0.040565334260463715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,31,0.041375999649365745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,63,0.04387199878692627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,63,0.043509334325790405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,63,0.04106666644414266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,63,0.040522667268911995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,127,0.04380266865094503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,127,0.043552001317342125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,127,0.041450666884581246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,127,0.04147200038035711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,255,0.0452106644709905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,255,0.04557333389918009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,255,0.041738669077555336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,255,0.03979199876387914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,511,0.0443146675825119
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,3,0.04159466673930486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,511,0.06808533271153767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,511,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,1,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,511,0.06196266909440359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,1,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,1,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,float16,1023,0.04513066510359446
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,3,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,3,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,float16,1023,0.10706667105356853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,3,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,128,1,float16,fp8,1023,0.04181866844495138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,3,0.00916800027092298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,15,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,7,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,7,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,64,0,1,float16,fp8,1023,0.0949173370997111
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,7,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,7,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,15,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,15,0.0100426667680343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,31,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,63,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,31,0.008698666468262672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,31,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,31,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,63,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,63,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,63,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,127,0.010591999938090643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,127,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,127,0.008746666833758354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,127,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,255,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,255,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,255,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,1023,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,511,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,2047,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,511,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,2047,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,1023,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,1023,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,2047,0.014074667046467463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,2047,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,4095,0.011642667154471079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,4095,0.01703466723362605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,4095,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,4095,0.015119999647140503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,8191,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,8191,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,8191,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,8191,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,16383,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,16383,0.021029333273569744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,16383,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,16383,0.019018666197856266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,32767,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,32767,0.025386666258176167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,32767,0.023370665808518726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,32767,0.011589333415031433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,65535,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,65535,0.04182933270931244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,1,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,65535,0.03156266609827677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,65535,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,1,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,1,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,3,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,3,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,3,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,7,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,7,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,15,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,float16,131071,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,15,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,fp8,131071,0.05479466418425242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,15,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,0,1,float16,float16,131071,0.06373866895834605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,31,0.009354666496316591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,64,128,1,float16,fp8,131071,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,31,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,31,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,31,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,63,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,63,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,63,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,127,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,127,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,127,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,127,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,255,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,511,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,1023,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,511,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,1023,0.009002666920423508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,1023,0.011109333485364914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,1023,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,2047,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,2047,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,2047,0.01292266696691513
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,2047,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,8191,0.01268799975514412
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,4095,0.012746666868527731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,4095,0.015168000012636185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,4095,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,4095,0.015205333630243937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,8191,0.019120000302791595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,8191,0.011754666765530905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,8191,0.01758933315674464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,16383,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,16383,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,16383,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,16383,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,32767,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,32767,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,32767,0.014650666465361914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,32767,0.031290667752424874
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,65535,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,65535,0.0639466643333435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,65535,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,65535,0.054117331902186074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,1,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,1,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,3,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,3,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,7,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,15,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,15,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,31,0.009642666826645533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,63,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,127,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,127,0.010480000327030817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,255,0.010640000303586325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,float16,131071,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,255,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,511,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,511,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,float16,131071,0.11254933476448059
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,128,1,float16,fp8,131071,0.0141546664138635
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,64,0,1,float16,fp8,131071,0.0930560032526652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,1023,0.011616000284751257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,2047,0.015082667271296183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,2047,0.014853333433469137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,8191,0.019333332777023315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,4095,0.017290666699409485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,4095,0.01687466725707054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,8191,0.021359999974568684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,16383,0.03957333415746689
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,16383,0.025050667424996693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,1,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,32767,0.05986666679382324
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,32767,0.04021333406368891
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,1,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,3,0.010570666442314783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,3,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,7,0.01009599988659223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,15,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,31,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,15,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,65535,0.10223999619483948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,65535,0.06192533175150553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,31,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,63,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,127,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,127,0.010165333126982054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,255,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,255,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,511,0.01116266722480456
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,511,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,2047,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,1023,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,1023,0.012789333860079447
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,2047,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,4095,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,4095,0.019343999524911244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,fp8,131071,0.10764267047246297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,8191,0.039674667020638786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,1,128,0,1,float16,float16,131071,0.18811200062433878
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,8191,0.02499199906984965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,16383,0.06011199951171875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,16383,0.039818666875362396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,32767,0.10170666376749675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,1,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,32767,0.06217599908510844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,3,0.009519999846816063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,3,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,7,0.00915733352303505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,15,0.009599999835093817
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,7,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,65535,0.18518932660420737
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,15,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,31,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,63,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,65535,0.10534399747848511
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,63,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,127,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,127,0.009397333487868309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,511,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,511,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,255,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,255,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,1023,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,2047,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,2047,0.01118933285276095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,4095,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,4095,0.012128000458081564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,8191,0.015930666277805965
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,float16,131071,0.3543573220570882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,8191,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,4,2,128,0,1,float16,fp8,131071,0.1951786677042643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,16383,0.01932799940307935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,16383,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,32767,0.028783999383449554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,32767,0.027317332724730175
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,1,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,65535,0.03147733211517334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,3,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,3,0.010426666587591171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,65535,0.02994133283694585
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,7,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,31,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,15,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,fp8,131071,0.03331733246644338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,63,0.008805333326260248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,1,128,0,1,float16,float16,131071,0.035573333501815796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,127,0.00892800030608972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,255,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,255,0.009445333232482275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,2047,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,511,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,4095,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,1023,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,1023,0.010970667004585266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,2047,0.012266666938861212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,4095,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,8191,0.016261332978804905
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,8191,0.014906667172908783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,16383,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,16383,0.01913600042462349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,32767,0.022154666483402252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,32767,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,1,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,65535,0.025424001117547352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,65535,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,1,0.00884799969693025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,3,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,3,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,7,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,15,0.009839999799927076
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,15,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,31,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,63,0.009525333220760027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,255,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,float16,131071,0.03088533381621043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,63,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,4,2,128,0,1,float16,fp8,131071,0.027562665442625683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,127,0.009413333609700203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,127,0.009514666472872099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,255,0.009226666763424873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,1023,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,8191,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,1023,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,2047,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,16383,0.019551999866962433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,2047,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,4095,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,4095,0.011482667177915573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,8191,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,16383,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,32767,0.023472001155217487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,65535,0.025258667767047882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,1,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,32767,0.022101332743962605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,1,0.009898666913310686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,7,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,65535,0.02478400121132533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,3,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,7,0.009173333023985228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,15,0.009455999980370203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,15,0.010575999816258749
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,float16,131071,0.03143999973932902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,31,0.009674666449427605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,31,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,255,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,63,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,1,128,0,1,float16,fp8,131071,0.029146666328112285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,127,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,511,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,1023,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,2047,0.012117333710193634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,2047,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,4095,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,4095,0.012432000289360682
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,8191,0.016949333250522614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,8191,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,16383,0.019253333409627277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,32767,0.021216000119845074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,16383,0.01926933353145917
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,32767,0.022842665513356526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,1,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,65535,0.024842667082945507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,65535,0.02125866711139679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,1,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,3,0.010469333579142889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,7,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,31,0.009338666374484697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,15,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,63,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,31,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,float16,131071,0.044495999813079834
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,63,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,4,2,128,0,1,float16,fp8,131071,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,511,0.011285333583752314
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,127,0.010954666882753372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,255,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,511,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,1023,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,1023,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,2047,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,2047,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,4095,0.021146667500336964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,4095,0.01918399954835574
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,8191,0.039605334401130676
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,8191,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,16383,0.06060799956321716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,16383,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,1,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,1,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,3,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,32767,0.10197333494822185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,7,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,32767,0.0627040018637975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,3,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,31,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,7,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,15,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,15,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,31,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,63,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,255,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,127,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,127,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,fp8,65535,0.10600533088048299
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,511,0.011727999895811081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,511,0.011087999989589056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,1,128,0,1,float16,float16,65535,0.1869973341623942
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,1023,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,1023,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,4095,0.02478400121132533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,2047,0.01937599976857503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,2047,0.01794133335351944
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,4095,0.03874133278926214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,8191,0.05991999804973602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,8191,0.039450667798519135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,16383,0.10040000081062317
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,16383,0.05983999868233999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,1,0.009919999788204828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,1,0.010026666646202406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,3,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,3,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,7,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,32767,0.18488534291585287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,7,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,32767,0.10166933139165242
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,15,0.009749333063761393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,15,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,31,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,63,0.010464000205198923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,63,0.00997866690158844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,127,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,float16,65535,0.3492853244145711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,4,2,128,0,1,float16,fp8,65535,0.18702934185663858
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,255,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,511,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,511,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,1023,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,2047,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,2047,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,4095,0.012965332716703415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,16383,0.020224000016848247
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,4095,0.01322666679819425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,8191,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,8191,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,32767,0.021312000850836437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,16383,0.019296000401178997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,32767,0.022970666488011677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,1,0.00938666673998038
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,1,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,65535,0.02537599951028824
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,65535,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,3,0.009850666547815004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,7,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,7,0.009242666885256767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,15,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,31,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,63,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,float16,131071,0.04696533580621084
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,63,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,127,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,1,128,0,1,float16,fp8,131071,0.031311998764673867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,127,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,1023,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,255,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,255,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,511,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,4095,0.016735999534527462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,2047,0.013210666676362356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,2047,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,4095,0.01693333312869072
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,8191,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,8191,0.01716800034046173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,16383,0.01929066702723503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,16383,0.01923199991385142
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,32767,0.023376000424226124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,32767,0.021344001094500225
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,65535,0.042853335539499916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,1,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,3,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,65535,0.02717333287000656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,3,0.010629333555698395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,7,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,7,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,15,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,15,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,127,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,63,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,511,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,float16,131071,0.06555733581384023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,127,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,4,2,128,0,1,float16,fp8,131071,0.044826666514078774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,255,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,255,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,2047,0.01894933357834816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,511,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,1023,0.012794667234023413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,1023,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,2047,0.021520001192887623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,4095,0.039290666580200195
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,4095,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,8191,0.06022400160630544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,8191,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,1,0.012703999876976013
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,1,0.011594666788975397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,16383,0.10152000188827515
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,3,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,7,0.012554666648308435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,16383,0.060138667623202004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,3,0.011541333049535751
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,7,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,15,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,15,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,31,0.011786667009194693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,31,0.011760000139474869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,63,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,63,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,255,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,127,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,127,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,255,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,fp8,32767,0.10296533505121867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,1,128,0,1,float16,float16,32767,0.1852746605873108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,2047,0.03976000100374222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,511,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,511,0.012954667210578918
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,1023,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,1023,0.016810666769742966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,2047,0.025205334027608235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,4095,0.06021333237489065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,4095,0.04153066625197729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,8191,0.10293866197268169
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,8191,0.06165333092212677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,1,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,1,0.012122667084137598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,3,0.012730666746695837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,16383,0.18478933970133463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,3,0.011957333733638128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,16383,0.10499733686447144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,7,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,7,0.011621333658695221
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,15,0.012778667112191519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,15,0.011258666714032492
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,31,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,31,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,63,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,63,0.011877333124478659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,float16,32767,0.354533314704895
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,127,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,127,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,4,2,128,0,1,float16,fp8,32767,0.18930667638778687
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,255,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,255,0.01180800050497055
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,511,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,511,0.01293333371480306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,1023,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,2047,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,2047,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,1023,0.01809599995613098
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,4095,0.061237335205078125
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,1,0.015061333775520325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,4095,0.04041066765785217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,1,0.013429333766301474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,float16,8191,0.10294399658838908
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,7,0.014943999548753103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,3,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,15,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,3,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,31,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,7,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,1,128,0,1,float16,fp8,8191,0.06185600161552429
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,15,0.012997332960367203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,31,0.014933332800865173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,63,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,63,0.01312000056107839
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,127,0.014794666320085526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,127,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,255,0.014901333798964819
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,255,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,511,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,511,0.01727466657757759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,1023,0.03342933456103007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,1023,0.022474666436513264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,2047,0.06087466577688853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,2047,0.03994133323431015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,4095,0.10316266616185506
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,4095,0.0637546678384145
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,1,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,float16,8191,0.18666134277979532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,1,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,3,0.014767999450365702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,4,2,128,0,1,float16,fp8,8191,0.10564266641934712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,3,0.013834666460752487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,7,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,7,0.014896000425020853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,15,0.014864000181357065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,15,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,31,0.015008000036080679
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,31,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,63,0.01482133318980535
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,63,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,127,0.01488000030318896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,127,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,511,0.01711999997496605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,255,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,255,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,1023,0.022976001103719074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,511,0.018986667195955913
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,1023,0.03502399971087774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,2047,0.060975998640060425
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,2047,0.04134399940570196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,1,0.01721599946419398
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,3,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,float16,4095,0.1030346651871999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,3,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,1,0.019648000597953796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,1,128,0,1,float16,fp8,4095,0.06262399752934773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,7,0.019248000035683315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,7,0.018954666952292126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,15,0.019226666539907455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,15,0.017973333597183228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,31,0.01941866676012675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,127,0.019013332823912304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,31,0.016976000120242436
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,63,0.01942933350801468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,63,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,127,0.01884799947341283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,255,0.019098666807015736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,255,0.017152000218629837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,511,0.03324799984693527
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,1023,0.05389333268006643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,511,0.02306666721900304
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,1023,0.03534399966398875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,2047,0.0981119970480601
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,3,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,2047,0.06010666489601135
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,3,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,7,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,15,0.009434666484594345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,float16,4095,0.17484267552693686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,4,2,128,0,1,float16,fp8,4095,0.09874133268992107
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,63,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,15,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,31,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,31,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,255,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,63,0.00972800018886725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,127,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,127,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,255,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,511,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,1023,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,4095,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,1023,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,2047,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,2047,0.01314666618903478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,4095,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,8191,0.0183146670460701
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,8191,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,32767,0.02515200028816859
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,16383,0.02126399924357732
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,16383,0.018976000448067982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,32767,0.02117866774400075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,1,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,1,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,3,0.010341333225369453
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,3,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,65535,0.02738133321205775
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,65535,0.04308266441027323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,7,0.010543999572594961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,7,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,15,0.010069333637754122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,15,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,31,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,31,0.01097600037852923
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,63,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,63,0.009285333255926767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,127,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,float16,131071,0.06647466619809468
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,1,128,0,1,float16,fp8,131071,0.04382933179537455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,127,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,255,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,511,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,4095,0.016906666258970898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,1023,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,8191,0.018944000204404194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,1023,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,2047,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,2047,0.015173333386580149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,4095,0.017093333105246227
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,8191,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,16383,0.023381332556406658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,16383,0.020874666670958202
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,32767,0.041434665520985924
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,32767,0.02584533393383026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,65535,0.06091733276844025
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,65535,0.03967999915281931
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,1,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,1,0.01717866708834966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,3,0.02073066681623459
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,3,0.019002666076024372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,7,0.020997333029905956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,7,0.018922666708628338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,15,0.020309332758188248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,15,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,31,0.02059200033545494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,float16,131071,0.10152533650398254
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,4,2,128,0,1,float16,fp8,131071,0.06305066744486491
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,63,0.02103466788927714
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,31,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,63,0.01897066707412402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,127,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,255,0.020288000504175823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,127,0.017162666966517765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,255,0.018992000569899876
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,511,0.03454933315515518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,511,0.023455999791622162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,1023,0.05385066568851471
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,1,0.029130667448043823
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,1023,0.03561066587766012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,1,0.027232001225153606
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,float16,2047,0.09925333658854167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,3,0.029530666768550873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,1,128,0,1,float16,fp8,2047,0.060271998246510826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,3,0.02720000098148982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,7,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,7,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,15,0.029466666281223297
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,31,0.029525332152843475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,15,0.025301332275072735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,31,0.027285332481066387
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,63,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,63,0.027077332139015198
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,127,0.02942399928967158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,127,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,255,0.03324266771475474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,255,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,511,0.054101333022117615
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,511,0.03972266614437103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,1023,0.09339732925097148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,1023,0.05819199979305267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,float16,2047,0.17686933279037476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,1,0.029701332251230877
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,4,2,128,0,1,float16,fp8,2047,0.10094400246938069
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,3,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,1,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,3,0.027488000690937042
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,7,0.031152000029881794
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,7,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,15,0.02951466788848241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,15,0.027119999130566914
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,31,0.02959999938805898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,31,0.02717866748571396
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,63,0.03139200061559677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,127,0.031210665901501972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,127,0.027349332968393963
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,255,0.033786666889985405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,255,0.027136000494162243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,511,0.055760001142819725
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,511,0.03942399968703588
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,float16,1023,0.09533333778381348
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,1,0.04994666576385498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,1,0.042693331837654114
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,1023,0.0584746648867925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,3,0.04970133304595947
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,3,0.0436106671889623
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,7,0.04972266654173533
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,7,0.04344533383846283
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,15,0.04359466830889384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,15,0.049813335140546165
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,31,0.04966933528582255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,63,0.049653331438700356
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,63,0.043477331598599754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,127,0.0510506679614385
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,1,128,0,1,float16,fp8,63,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,127,0.04343999922275543
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,255,0.055311997731526695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,255,0.045642669002215065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,1,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,1,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,511,0.0950986643632253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,3,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,511,0.06606400012969971
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,3,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,7,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,7,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,15,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,31,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,31,0.009290666629870733
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,127,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,63,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,63,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,float16,1023,0.17245866854985556
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,127,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,1023,0.10276266932487488
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,255,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,255,0.010879999647537867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,511,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,1023,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,2047,0.014848000059525171
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,2047,0.015040000279744467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,4095,0.017157333592573803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,4095,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,8191,0.019413333386182785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,8191,0.01700266698996226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,16383,0.023317334552605946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,16383,0.02083733429511388
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,4,2,128,0,1,float16,fp8,31,0.04366933306058248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,32767,0.04140799989302953
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,32767,0.025392000873883564
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,1,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,1,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,65535,0.06154666841030121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,7,0.008922666932145754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,3,0.008799999952316284
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,3,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,65535,0.04148799926042557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,7,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,15,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,31,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,63,0.01055466632048289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,127,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,63,0.010608000059922537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,127,0.009690666571259499
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,255,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,fp8,131071,0.06234666705131531
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,255,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,511,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,1,128,0,1,float16,float16,131071,0.10267200072606404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,2047,0.015279999623696009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,511,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,4095,0.016837333639462788
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,8191,0.02107200026512146
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,2047,0.014730667074521383
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,4095,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,8191,0.019039999693632126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,16383,0.024959998826185863
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,16383,0.03875733415285746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,32767,0.059263999263445534
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,32767,0.039493332306543984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,1,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,1,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,1,0.009423999736706415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,3,0.009402666861812273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,65535,0.10076799988746643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,3,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,7,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,3,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,65535,0.06227200229962667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,7,0.00919999989370505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,7,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,7,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,15,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,float16,131071,0.18791999419530234
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,15,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,4,2,128,0,1,float16,fp8,131071,0.10757866501808167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,31,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,31,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,63,0.009349333122372627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,63,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,255,0.010106666634480158
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,127,0.01089599976936976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,127,0.010405333091815313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,127,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,255,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,255,0.010458666831254959
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,255,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,1023,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,511,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,511,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,2047,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,1023,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,1023,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,1023,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,2047,0.012842666357755661
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,2047,0.012800000607967377
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,2047,0.014752000570297241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,4095,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,4095,0.01525866612792015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,4095,0.012986666212479273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,8191,0.017221332838137943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,4095,0.015050667027632395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,8191,0.012736000120639801
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,8191,0.01899733394384384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,8191,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,16383,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,16383,0.024645333488782246
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,16383,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,16383,0.02274666726589203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,32767,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,32767,0.039919999738534294
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,32767,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,32767,0.03233066697915395
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,65535,0.012661332885424295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,65535,0.06435733536879222
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,65535,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,1,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,65535,0.05392000079154968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,1,0.010357333347201347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,1,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,3,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,3,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,3,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,3,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,7,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,7,0.011365332951148352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,7,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,7,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,15,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,15,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,15,0.00878399983048439
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,float16,131071,0.013482666263977686
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,31,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,15,0.01128000020980835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,float16,131071,0.11312533418337505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,31,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,31,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,0,1,float16,fp8,131071,0.09477866689364116
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,31,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,64,128,1,float16,fp8,131071,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,63,0.011247999966144562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,63,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,63,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,127,0.00871999996403853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,255,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,127,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,127,0.00956266683836778
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,127,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,255,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,255,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,255,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,1023,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,511,0.009695999945203463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,511,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,511,0.011098666737476984
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,511,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,1023,0.009637333452701569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,1023,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,2047,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,2047,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,2047,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,2047,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,4095,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,8191,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,4095,0.011221333096424738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,4095,0.009679999823371569
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,4095,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,8191,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,8191,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,16383,0.01915733392039935
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,8191,0.015589332828919092
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,16383,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,16383,0.01904533306757609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,16383,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,32767,0.009877333417534828
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,32767,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,32767,0.00873066671192646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,32767,0.025466665625572205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,65535,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,65535,0.029391999046007793
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,65535,0.03051200012365977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,65535,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,1,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,1,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,1,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,1,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,3,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,3,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,3,0.010581333190202713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,3,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,fp8,131071,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,7,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,fp8,131071,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,128,1,float16,float16,131071,0.011952000359694162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,7,0.00897066667675972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,64,0,1,float16,float16,131071,0.03339733431736628
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,15,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,15,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,15,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,15,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,31,0.009706666693091393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,31,0.008901333436369896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,31,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,63,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,127,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,63,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,63,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,63,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,127,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,127,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,127,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,255,0.009114666531483332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,255,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,255,0.009765333185593287
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,1023,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,511,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,1023,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,511,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,2047,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,511,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,2047,0.010922666639089584
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,511,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,1023,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,1023,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,2047,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,2047,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,4095,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,4095,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,4095,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,4095,0.011701333026091257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,8191,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,8191,0.015114666273196539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,8191,0.01002133327225844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,8191,0.01522133375207583
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,16383,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,16383,0.019093333433071773
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,16383,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,16383,0.018981333822011948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,32767,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,32767,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,32767,0.021274665991465252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,32767,0.02161066730817159
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,65535,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,65535,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,65535,0.023711999257405598
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,65535,0.023237332701683044
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,1,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,1,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,3,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,float16,131071,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,3,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,7,0.009653333574533463
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,3,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,3,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,128,1,float16,fp8,131071,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,float16,131071,0.027530667682488758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,7,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,7,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,7,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,15,0.009119999905427298
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,31,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,15,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,64,0,1,float16,fp8,131071,0.028746667007605236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,15,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,63,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,15,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,31,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,31,0.010901333143313726
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,63,0.010165333126982054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,63,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,63,0.010255999863147736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,127,0.011120000233252844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,255,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,127,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,127,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,511,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,255,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,255,0.009957333405812582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,255,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,511,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,511,0.010911999891201654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,511,0.011077333241701126
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,1023,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,1023,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,4095,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,2047,0.012650666137536367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,2047,0.01621333385507266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,4095,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,2047,0.013066666821638743
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,8191,0.01421333352724711
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,2047,0.015226667126019796
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,4095,0.018965333700180054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,8191,0.023056000471115112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,4095,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,8191,0.024256000916163128
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,8191,0.012853333105643591
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,16383,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,16383,0.041706666350364685
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,16383,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,16383,0.03148799886306127
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,32767,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,32767,0.0643039991458257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,32767,0.013002666334311167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,32767,0.05401599903901418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,65535,0.11044800281524658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,65535,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,65535,0.012752000242471695
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,65535,0.09264000256856282
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,1,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,1,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,1,0.008933333059151968
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,1,0.010474666953086853
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,3,0.008943999807039896
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,3,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,3,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,7,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,7,0.00898133342464765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,7,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,15,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,15,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,15,0.009162666896979014
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,15,0.00891733355820179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,31,0.010725333044926325
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,31,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,31,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,31,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,63,0.00903466654320558
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,63,0.009722666814923286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,63,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,63,0.0103946669648091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,127,0.009754666437705358
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,127,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,127,0.009541333342591921
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,127,0.009621333330869675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,255,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,255,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,255,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,255,0.010090666512648264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,511,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,float16,131071,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,511,0.010634666929642359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,128,1,float16,fp8,131071,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,511,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,1023,0.010421333213647207
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,float16,131071,0.205402672290802
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,1023,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,1023,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,1023,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,2047,0.012149333953857422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,64,0,1,float16,fp8,131071,0.16889599959055582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,2047,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,4095,0.011802667131026586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,2047,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,2047,0.012026666353146235
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,8191,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,4095,0.012821332861979803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,4095,0.011034666250149408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,8191,0.016965333372354507
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,4095,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,8191,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,8191,0.012741333494583765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,16383,0.019381333142518997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,16383,0.01109333336353302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,16383,0.011850666254758835
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,16383,0.019109333554903667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,32767,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,32767,0.021087999145189922
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,32767,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,32767,0.021242665747801464
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,65535,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,65535,0.023215999205907185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,65535,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,1,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,65535,0.023050665855407715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,1,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,1,0.011066666493813196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,1,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,3,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,3,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,3,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,3,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,7,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,fp8,131071,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,7,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,7,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,float16,131071,0.029365333418051403
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,128,1,float16,float16,131071,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,15,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,7,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,15,0.0107893335322539
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,64,0,1,float16,fp8,131071,0.027552001178264618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,15,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,31,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,31,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,63,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,31,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,31,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,127,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,63,0.010869332899649939
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,63,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,127,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,127,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,127,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,511,0.010538666198650995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,255,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,255,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,255,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,511,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,511,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,511,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,1023,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,1023,0.012768000364303589
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,2047,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,1023,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,1023,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,2047,0.012762666990359625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,2047,0.013045333325862885
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,2047,0.01718933383623759
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,4095,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,4095,0.02327999969323476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,4095,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,4095,0.023168000082174938
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,8191,0.012981332838535309
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,8191,0.03892799963553747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,8191,0.01321600005030632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,8191,0.03146666785081228
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,16383,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,16383,0.012874666601419449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,16383,0.06225066880385081
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,16383,0.05392533540725708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,32767,0.10708799958229065
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,32767,0.012639999389648438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,32767,0.013114667187134424
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,32767,0.09262933333714803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,1,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,1,0.011205332974592844
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,1,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,1,0.010874666273593903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,3,0.01201066623131434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,3,0.011178666104873022
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,3,0.011312000453472137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,3,0.011157333850860596
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,7,0.012154666086037954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,7,0.011770666887362799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,7,0.011349332829316458
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,7,0.011141333729028702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,15,0.0120319997270902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,15,0.011493333925803503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,15,0.011413333316644033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,15,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,31,0.011130666981140772
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,31,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,31,0.011472000430027643
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,31,0.011391999820868174
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,63,0.011744000017642975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,63,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,63,0.011418666690587997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,63,0.011706666400035223
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,127,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,127,0.01102399950226148
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,127,0.011071999867757162
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,127,0.011253333340088526
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,float16,65535,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,255,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,255,0.011173332730929056
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,128,1,float16,fp8,65535,0.012847999731699625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,255,0.011898666620254517
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,255,0.011215999722480774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,float16,65535,0.19708265860875449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,511,0.011125333607196808
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,64,0,1,float16,fp8,65535,0.16884267330169678
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,1023,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,511,0.012960000584522883
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,511,0.011226666470368704
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,511,0.012805332740147909
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,1023,0.01110400011142095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,1023,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,2047,0.014954666296641031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,1023,0.015285332997639975
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,2047,0.023258666197458904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,2047,0.02442666639884313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,2047,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,4095,0.01551466683546702
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,4095,0.0406986673672994
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,4095,0.014965333044528961
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,4095,0.03331200033426285
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,8191,0.014959999670584997
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,8191,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,8191,0.06322666505972545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,8191,0.05595199763774872
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,1,0.012991999586423239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,1,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,1,0.013007999708255133
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,1,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,3,0.013183999806642532
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,float16,16383,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,float16,16383,0.10718400279680888
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,3,0.014912000546852747
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,3,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,128,1,float16,fp8,16383,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,3,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,64,0,1,float16,fp8,16383,0.09417066971460979
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,7,0.013088000317414602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,7,0.013786666095256805
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,7,0.013034666577974955
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,15,0.013669333110253016
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,7,0.012906666845083237
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,15,0.013994666437307993
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,15,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,15,0.014122666170199713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,31,0.013866666704416275
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,31,0.013093333691358566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,31,0.013376000026861826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,31,0.012901333471139273
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,63,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,63,0.013056000073750814
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,63,0.01471466695268949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,63,0.012890666723251343
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,127,0.014858666807413101
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,127,0.013471999516089758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,127,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,255,0.013162666310866674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,127,0.01301866645614306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,255,0.014181333283583323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,255,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,255,0.013173333058754602
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,511,0.017269333203633625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,511,0.012879999975363413
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,511,0.013023999830087027
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,511,0.01609066625436147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,1023,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,1023,0.02309333284695943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,1023,0.013130666067202887
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,1023,0.02145066608985265
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,2047,0.016757333030303318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,2047,0.04113066693147024
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,2047,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,2047,0.033600000043710075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,4095,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,4095,0.06403199831644694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,1,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,4095,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,4095,0.0566293348868688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,1,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,1,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,1,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,3,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,3,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,3,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,3,0.008863999818762144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,fp8,8191,0.09458667039871216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,0,1,float16,float16,8191,0.10842133561770122
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,7,0.010618666807810465
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,7,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,float16,8191,0.016773333152135212
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,64,128,1,float16,fp8,8191,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,7,0.010890666395425797
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,7,0.009189333145817121
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,15,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,15,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,15,0.010186666622757912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,15,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,31,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,31,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,31,0.009183999771873156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,31,0.008949333180983862
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,63,0.00890666681031386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,63,0.009152000149091085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,127,0.009658666948477427
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,63,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,63,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,127,0.009008000294367472
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,127,0.008938666433095932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,255,0.009296000003814697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,255,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,255,0.010037333394090334
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,255,0.00973866693675518
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,511,0.009824000298976898
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,511,0.009269333134094873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,511,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,1023,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,2047,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,1023,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,1023,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,2047,0.011018666128317514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,2047,0.012815999488035837
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,2047,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,4095,0.012725333372751871
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,4095,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,4095,0.012975999464591345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,4095,0.015178666760524115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,16383,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,8191,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,8191,0.017029333859682083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,8191,0.012917333592971167
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,8191,0.016997333616018295
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,16383,0.019178666174411774
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,16383,0.012810666114091873
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,16383,0.018063999712467194
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,32767,0.013338666409254074
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,32767,0.022042666872342426
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,32767,0.011792000383138657
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,32767,0.021104000508785248
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,65535,0.012773333738247553
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,65535,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,65535,0.012693333129088083
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,65535,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,1,0.017946666727463405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,1,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,float16,131071,0.01479999969402949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,1,0.016890666137139004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,float16,131071,0.04442666471004486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,1,0.016970666746298473
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,3,0.017184000462293625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,128,1,float16,fp8,131071,0.012970666090647379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,3,0.018874666343132656
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,3,0.016927999754746754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,64,0,1,float16,fp8,131071,0.035258665680885315
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,3,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,7,0.01720000058412552
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,7,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,7,0.017024000485738117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,7,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,15,0.017210666090250015
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,15,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,15,0.01699200024207433
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,15,0.016832000265518825
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,31,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,31,0.01725333308180173
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,31,0.017173333714405697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,31,0.01720533271630605
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,63,0.016869333883126576
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,63,0.01889066646496455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,63,0.017279999951521557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,63,0.017231999586025875
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,127,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,127,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,127,0.01691199963291486
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,127,0.01709866647919019
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,255,0.018911999960740406
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,255,0.018602666755517323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,255,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,255,0.01722666621208191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,511,0.01903466631968816
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,511,0.02309866746266683
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,511,0.01714133347074191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,511,0.021183999876181286
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,1023,0.017103999853134155
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,1023,0.0367253323396047
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,1023,0.016901332885026932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,1023,0.0314026673634847
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,2047,0.021045332153638203
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,2047,0.020992000897725422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,2047,0.053727999329566956
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,2047,0.05992533266544342
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,float16,4095,0.02096533278624217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,1,0.02733866622050603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,128,1,float16,fp8,4095,0.020970667401949566
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,1,0.026341333985328674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,1,0.025274666647116344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,float16,4095,0.10051733255386353
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,1,0.025146665672461193
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,64,0,1,float16,fp8,4095,0.08682133754094441
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,3,0.02735466758410136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,3,0.02678400029738744
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,3,0.025429333249727886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,3,0.025061334172884624
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,7,0.027215999861558277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,7,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,7,0.02536533276240031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,15,0.02737066646416982
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,15,0.026549334327379864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,15,0.025418666501839954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,15,0.02531733363866806
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,31,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,31,0.0271519993742307
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,31,0.02515733242034912
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,31,0.02517866591612498
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,63,0.026741333305835724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,63,0.027056001126766205
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,63,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,63,0.025360000630219776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,127,0.02716800073782603
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,127,0.027109332382678986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,127,0.02514133354028066
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,127,0.025098666548728943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,255,0.027327999472618103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,255,0.027098665634791057
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,255,0.025253333151340485
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,255,0.025125332176685333
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,511,0.027237333357334137
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,511,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,7,0.027104000250498455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,511,0.025445332129796345
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,511,0.035029334326585136
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,1023,0.02731200059254964
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,1023,0.05983999868233999
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,1,0.009066666786869368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,1023,0.025231999655564625
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,1,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,1023,0.053632001082102455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,1,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,1,0.008912000184257826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,3,0.009093333035707474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,3,0.008890666688481966
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,3,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,3,0.008976000050703684
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,float16,2047,0.02932800104220708
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,7,0.00895999992887179
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,7,0.009141333401203156
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,128,1,float16,fp8,2047,0.028922667105992634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,7,0.009973333527644476
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,float16,2047,0.10347732901573181
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,7,0.009808000177145004
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,15,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,15,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,64,0,1,float16,fp8,2047,0.09097599983215332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,31,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,31,0.008997333546479544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,31,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,31,0.009450666606426239
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,63,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,63,0.010858666151762009
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,63,0.010586666564146677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,63,0.010709332923094431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,255,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,127,0.009082666908701261
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,127,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,511,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,127,0.009893333539366722
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,255,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,255,0.00922133338948091
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,255,0.010965333630641302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,1023,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,511,0.010842667271693548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,2047,0.012655999511480331
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,511,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,1023,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,1023,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,1023,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,4095,0.012831999609867731
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,2047,0.014560000350077948
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,2047,0.012757333616415659
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,2047,0.014805333067973455
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,4095,0.012719999998807907
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,4095,0.01594666639963786
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,4095,0.015210667004187902
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,8191,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,8191,0.017573333034912746
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,8191,0.010944000134865442
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,8191,0.016986666868130367
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,16383,0.011584000041087469
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,16383,0.020949333906173706
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,16383,0.013978666315476099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,16383,0.019685332973798115
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,32767,0.011674666156371435
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,32767,0.011354666203260422
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,32767,0.025514667232831318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,32767,0.02317333221435547
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,65535,0.0408746674656868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,65535,0.011461333682139715
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,1,0.010048000141978264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,65535,0.011477333803971609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,65535,0.033301333586374916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,1,0.009952000031868616
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,3,0.009946666657924652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,7,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,3,0.009701333319147428
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,7,0.009578666960199675
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,15,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,31,0.009253333633144697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,31,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,63,0.010672000547250112
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,127,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,63,0.010687999427318573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,float16,131071,0.014837333311637243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,float16,131071,0.06461866696675618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,128,1,float16,fp8,131071,0.014831999937693277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,127,0.010255999863147736
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,255,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,255,0.00955200009047985
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,511,0.011045332998037338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,64,0,1,float16,fp8,131071,0.05584533512592316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,511,0.01081066702802976
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,1023,0.01181866725285848
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,1023,0.014096000542243322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,2047,0.015397333850463232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,2047,0.01509333277742068
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,4095,0.01695999999841054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,4095,0.0169813334941864
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,8191,0.021194666624069214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,8191,0.019007999449968338
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,16383,0.03956799954175949
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,16383,0.025114665428797405
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,32767,0.059952000776926674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,32767,0.04036800066630045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,1,0.010533332824707031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,1,0.009061333412925402
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,3,0.009306666751702627
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,3,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,7,0.009045333291093508
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,7,0.009381333366036415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,15,0.00877333308259646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,15,0.010693332801262537
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,65535,0.10133866469065349
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,31,0.009098666409651438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,31,0.008725333337982496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,65535,0.061941335598627724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,63,0.010981333752473196
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,63,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,127,0.008885333314538002
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,127,0.009759999811649323
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,255,0.0106133334338665
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,511,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,1023,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,255,0.008992000172535578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,511,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,1023,0.01098666712641716
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,2047,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,2047,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,4095,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,fp8,131071,0.10913599530855815
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,4095,0.010863999525705973
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,32,1,2,1,128,0,1,float16,float16,131071,0.18980266650517783
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,8191,0.015125333021084467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,8191,0.015295999745527903
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,16383,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,16383,0.018895999838908512
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,32767,0.02740799884001414
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,32767,0.027301333844661713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,1,0.009125333279371262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,3,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,1,0.008752000207702318
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,3,0.010602666685978571
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,65535,0.03294933338960012
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,65535,0.029487999776999157
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,31,0.00966933307548364
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,7,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,15,0.008810666700204214
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,float16,131071,0.03608533243338267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,31,0.009056000038981438
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,63,0.009072000160813332
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,63,0.009930666536092758
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,127,0.010751999914646149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,127,0.00901333304742972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1,1,2,1,128,0,1,float16,fp8,131071,0.03510933369398117
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,255,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,255,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,4095,0.012885333349307379
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,511,0.010661333799362183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,1023,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,1023,0.011029332876205444
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,2047,0.01101333275437355
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,2047,0.011146667102972666
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,16383,0.021333334346612293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,4095,0.0122079998254776
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,8191,0.015301333119471868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,8191,0.017045332739750545
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,16383,0.022090665996074677
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,32767,0.024058667321999867
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,32767,0.023029332359631855
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,1,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,1,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,65535,0.02712533374627431
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,7,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,7,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,3,0.010992000500361124
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,65535,0.02370133250951767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,3,0.010784000158309937
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,15,0.010885333021481832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,63,0.010138666878143946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,15,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,31,0.010213333492477735
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,31,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,float16,131071,0.031157332162062328
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,63,0.010656000425418219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,127,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2,1,2,1,128,0,1,float16,fp8,131071,0.0271573339899381
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,127,0.010415999839703241
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,255,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,511,0.011194666226704916
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,511,0.011152000476916632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,1023,0.012928000340859095
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,1023,0.011136000355084738
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,2047,0.01695466662446658
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,2047,0.01681600014368693
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,4095,0.020960000654061634
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,4095,0.019199999670187633
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,8191,0.039359999199708305
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,8191,0.02518933266401291
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,16383,0.06085866689682007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,16383,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,1,0.00914666677514712
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,32767,0.10155733426411946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,1,0.009029333169261614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,32767,0.0624533345301946
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,3,0.009109333157539368
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,3,0.009050666665037474
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,7,0.008986666798591614
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,7,0.010837333897749582
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,15,0.009770666559537252
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,15,0.009077333534757296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,31,0.01080000028014183
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,31,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,63,0.008837333569924036
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,63,0.010650667051474253
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,127,0.00980266680320104
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,127,0.010816000401973724
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,255,0.010677333921194077
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,255,0.010832000523805618
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,65535,0.18616533279418945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,511,0.010773333410422007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,511,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,1023,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,65535,0.10559999942779541
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,1023,0.010847999403874079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,2047,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,2047,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,4095,0.012949333836634954
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,4095,0.013151999562978745
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,8191,0.01823466643691063
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,8191,0.01878400022784869
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,16383,0.019061333189407986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,16383,0.020975999534130096
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,32767,0.02333866556485494
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,32767,0.02130666623512904
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,1,0.010698666175206503
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,65535,0.025194667279720306
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,65535,0.023018665611743927
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,1,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,3,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,3,0.010757333288590113
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,7,0.010821333775917688
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,7,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,15,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,31,0.010741333166758219
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,15,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,fp8,131071,0.1954186757405599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,31,0.010735999792814255
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,64,1,2,1,128,0,1,float16,float16,131071,0.35574932893117267
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,63,0.01090666651725769
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,127,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,63,0.011050666371981302
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,127,0.010938666760921478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,255,0.010853332777818045
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,float16,131071,0.046869332591692604
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,4,1,2,1,128,0,1,float16,fp8,131071,0.029461334149042766
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,511,0.011039999624093374
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,255,0.010933333386977514
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,511,0.012458667159080505
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,1023,0.013893333574136099
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,1023,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,4095,0.0232640008131663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,2047,0.02124800036350886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,2047,0.018288000176350277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,4095,0.03903999924659729
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,8191,0.05985066791375478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,8191,0.03955200066169103
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,16383,0.10129599769910176
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,1,0.01267733300725619
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,16383,0.06018133461475372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,1,0.01192533348997434
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,3,0.012858666479587555
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,3,0.011061333119869232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,7,0.012826666235923767
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,7,0.011717333147923151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,15,0.012863999853531519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,15,0.011055999745925268
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,32767,0.184714674949646
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,31,0.012165332833925882
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,31,0.011936000237862269
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,32767,0.10290666421254475
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,63,0.012709333250919977
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,63,0.01099733387430509
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,127,0.012383999923865
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,127,0.010960000256697336
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,255,0.012784000486135483
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,255,0.011626667032639185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,511,0.013744000345468521
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,1023,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,511,0.013167999684810638
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,2047,0.03982933362325033
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,1023,0.017909333109855652
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,2047,0.025461333493391674
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,4095,0.061146666606267296
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,4095,0.040074666341145836
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,8191,0.10289067029953003
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,1,0.014746667196353277
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,8191,0.061887999375661217
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,1,0.014922666052977243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,3,0.014698666830857595
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,float16,65535,0.3516853253046672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,3,0.013082666943470636
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,128,1,2,1,128,0,1,float16,fp8,65535,0.18732800086339316
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,float16,16383,0.18615466356277466
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,7,0.014890667051076889
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,256,1,2,1,128,0,1,float16,fp8,16383,0.10495466987291972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,7,0.014778666198253632
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,15,0.014991999914248785
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,15,0.01333333303531011
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,31,0.014826666563749313
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,31,0.013194666554530462
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,63,0.014885333677132925
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,63,0.0129120002190272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,127,0.014869333555301031
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,127,0.013104000439246496
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,255,0.014874666929244995
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,255,0.013760000467300415
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,511,0.019146667172511418
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,1023,0.03400533397992452
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,511,0.01724799970785777
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,1023,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,2047,0.0611413319905599
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,2047,0.03958400090535482
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,1,0.009088000282645226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,1,0.009717333440979322
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,3,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,3,0.008826666822036108
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,7,0.00902399979531765
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,4095,0.10353066523869832
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,4095,0.06393066545327504
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,7,0.010778666784365972
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,15,0.009130666653315226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,31,0.009039999917149544
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,63,0.010805333654085795
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,15,0.009712000067035357
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,31,0.010762666662534079
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,63,0.010768000036478043
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,127,0.01073066641887029
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,float16,8191,0.18742400407791138
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,127,0.009258666386206945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,255,0.010703999549150467
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,512,1,2,1,128,0,1,float16,fp8,8191,0.10717866818110149
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,1023,0.010928000013033548
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,255,0.010666667173306147
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,511,0.010746666540702185
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,4095,0.016842667013406754
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,4095,0.015135999768972397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,511,0.011002667248249054
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,1023,0.010949333508809408
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,2047,0.012837332983811697
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,16383,0.01893866683046023
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,2047,0.013077333569526672
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,8191,0.018933333456516266
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,8191,0.017887999614079792
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,16383,0.020928000410397846
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,32767,0.02518400053183238
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,32767,0.021882665654023487
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,65535,0.04267199834187826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,65535,0.025290665527184803
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,1,0.01931200052301089
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,1,0.018917333334684372
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,3,0.02027200038234393
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,3,0.017653333644072216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,7,0.019573333362738293
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,7,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,15,0.019237333287795384
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,15,0.018842666099468868
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,31,0.019946667055288952
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,fp8,131071,0.04594666759173075
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,31,0.018816000471512478
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,8,1,2,1,128,0,1,float16,float16,131071,0.06566399832566579
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,63,0.01966399947802226
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,63,0.0170666662355264
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,127,0.019023999571800232
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,127,0.01844800015290578
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,255,0.01969066634774208
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,255,0.01793066660563151
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,511,0.03369600077470144
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,511,0.02311466634273529
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,1023,0.053642665346463524
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,1023,0.035642666121323906
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,2047,0.059749335050582886
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,1,0.02958400050799052
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,1,0.027306665976842243
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,2047,0.09915733337402344
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,3,0.029498666524887085
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,3,0.027002667387326557
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,fp8,4095,0.09882666667302449
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,7,0.029520000020662945
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,7,0.02722666660944621
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,15,0.02956799914439519
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,15,0.025546667476495106
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,31,0.025536000728607178
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,1024,1,2,1,128,0,1,float16,float16,4095,0.1778293251991272
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,63,0.02921066681543986
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,63,0.027072000006834667
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,31,0.030154667794704437
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,127,0.02918400118748347
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,127,0.0272533322374026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,255,0.03363200028737386
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,255,0.025407999753952026
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,1,0.009136000027259191
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,511,0.054431999723116554
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,1,0.008965333302815756
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,511,0.03922666609287262
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,3,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,3,0.009343999748428663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,7,0.010645333677530289
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,7,0.009775999933481216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,15,0.009103999783595404
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,15,0.010794666906197866
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,1023,0.05819733440876007
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,31,0.00927466650803884
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,31,0.009216000015536943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,63,0.01062400018175443
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,63,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,127,0.008954666554927826
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,127,0.008896000062425932
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,1023,0.09402666489283244
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,float16,2047,0.17755732933680216
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,255,0.010714666297038397
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,255,0.01071999967098236
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,2048,1,2,1,128,0,1,float16,fp8,2047,0.10059733192125957
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,511,0.010826667149861654
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,2047,0.015109332899252573
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,511,0.010682666053374609
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,1023,0.01091733326514562
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,1023,0.011007999380429586
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,2047,0.014917333920796713
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,4095,0.017263999829689663
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,4095,0.016858667135238647
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,8191,0.020015999674797058
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,8191,0.017136000096797943
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,16383,0.023205332458019257
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,16383,0.021002667645613354
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,32767,0.04142399877309799
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,32767,0.025487999121348064
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,65535,0.06196266909440359
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,65535,0.04192533095677694
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,float16,131071,0.10351999600728352
TRTLLM,1.2.0rc5,NVIDIA B200,generation_attention,torch_flow,16,1,2,1,128,0,1,float16,fp8,131071,0.06377600133419037
